This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(tswge)
library(nnfor)
## Loading required package: forecast
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.0
## v tidyr 1.1.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts -------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(vars)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: strucchange
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
##
## Attaching package: 'strucchange'
## The following object is masked from 'package:stringr':
##
## boundary
## Loading required package: urca
## Loading required package: lmtest
us<-read.csv("/Data Folder/University/SMU/R for Time Series/Project/US_daily.csv",header=T)
head(us,2)
## date states positive negative pending hospitalizedCurrently
## 1 20200715 56 3478419 39042608 2947 56147
## 2 20200714 56 3413313 38351244 960 55509
## hospitalizedCumulative inIcuCurrently inIcuCumulative onVentilatorCurrently
## 1 269507 6326 12002 2322
## 2 267127 6233 11857 2263
## onVentilatorCumulative recovered dateChecked death hospitalized
## 1 1166 1075882 2020-07-15T00:00:00Z 129595 269507
## 2 1154 1049098 2020-07-14T00:00:00Z 128740 267127
## lastModified total totalTestResults posNeg deathIncrease
## 1 2020-07-15T00:00:00Z 42523974 42521027 42521027 855
## 2 2020-07-14T00:00:00Z 41765517 41764557 41764557 736
## hospitalizedIncrease negativeIncrease positiveIncrease
## 1 2380 691364 65106
## 2 2262 697403 62879
## totalTestResultsIncrease hash
## 1 756470 4e2fa5f3baa2968cef59f8395ece2f173d078291
## 2 760282 dc2b411ce1df07a2a25be3afa90ad1e0402bd8d6
#plot daily test positive count
plotts.wge(us$positiveIncrease)
# exclude early days only sick people gets tested
us1<-subset.data.frame(us,date>20200303) # data looks reasonable
us2<-subset.data.frame(us,date>20200503) # ensure the test positive rate is reasonable, for example <10%
summary.data.frame(us2)
## date states positive negative
## Min. :20200504 Min. :56 Min. :1185985 Min. : 6141286
## 1st Qu.:20200522 1st Qu.:56 1st Qu.:1607510 1st Qu.:11893006
## Median :20200609 Median :56 Median :1976557 Median :19116676
## Mean :20200597 Mean :56 Mean :2090309 Mean :20236873
## 3rd Qu.:20200627 3rd Qu.:56 3rd Qu.:2499250 3rd Qu.:27947034
## Max. :20200715 Max. :56 Max. :3478419 Max. :39042608
## pending hospitalizedCurrently hospitalizedCumulative inIcuCurrently
## Min. : 960 Min. :27770 Min. :139220 Min. : 5163
## 1st Qu.:1891 1st Qu.:31468 1st Qu.:182385 1st Qu.: 5628
## Median :2208 Median :37259 Median :219642 Median : 6402
## Mean :2466 Mean :38138 Mean :212301 Mean : 7496
## 3rd Qu.:2978 3rd Qu.:43004 3rd Qu.:239576 3rd Qu.: 9048
## Max. :4084 Max. :56147 Max. :269507 Max. :12696
## inIcuCumulative onVentilatorCurrently onVentilatorCumulative recovered
## Min. : 4579 Min. :1982 Min. : 430.0 Min. : 186752
## 1st Qu.: 7689 1st Qu.:2248 1st Qu.: 633.0 1st Qu.: 350135
## Median : 9141 Median :3090 Median : 771.0 Median : 609476
## Mean : 8923 Mean :3562 Mean : 800.9 Mean : 587703
## 3rd Qu.:10415 3rd Qu.:4716 3rd Qu.: 977.0 3rd Qu.: 772465
## Max. :12002 Max. :7070 Max. :1166.0 Max. :1075882
## dateChecked death hospitalized lastModified
## Length:73 Min. : 63653 Min. :139220 Length:73
## Class :character 1st Qu.: 90547 1st Qu.:182385 Class :character
## Mode :character Median :105838 Median :219642 Mode :character
## Mean :103152 Mean :212301
## 3rd Qu.:118955 3rd Qu.:239576
## Max. :129595 Max. :269507
## total totalTestResults posNeg deathIncrease
## Min. : 7330062 Min. : 7327271 Min. : 7327271 Min. : 209.0
## 1st Qu.:13504225 1st Qu.:13500516 1st Qu.:13500516 1st Qu.: 640.0
## Median :21094894 Median :21093233 Median :21093233 Median : 823.0
## Mean :22329648 Mean :22327182 Mean :22327182 Mean : 917.2
## 3rd Qu.:30448470 3rd Qu.:30446284 3rd Qu.:30446284 3rd Qu.:1024.0
## Max. :42523974 Max. :42521027 Max. :42521027 Max. :2740.0
## hospitalizedIncrease negativeIncrease positiveIncrease
## Min. :-2841 Min. :208933 Min. :16629
## 1st Qu.: 1040 1st Qu.:368410 1st Qu.:21319
## Median : 1435 Median :439702 Median :24701
## Mean : 1807 Mean :453565 Mean :31712
## 3rd Qu.: 1856 3rd Qu.:559083 3rd Qu.:41857
## Max. :17320 Max. :756730 Max. :66645
## totalTestResultsIncrease hash
## Min. :231456 Length:73
## 1st Qu.:390000 Class :character
## Median :462378 Mode :character
## Mean :485277
## 3rd Qu.:602866
## Max. :823375
#select all possible correlated variables for daily test positive rate
date = rev(us2$date)
hospitalized=rev(us2$hospitalizedCurrently)
inIcu=rev(us2$inIcuCurrently)
onVent=rev(us2$onVentilatorCurrently)
recovered=rev(us2$recovered)
deathIncrease=rev(us2$deathIncrease)
negativeIncrease=rev(us2$negativeIncrease)
hospitalizedIncrease=rev(us2$hospitalizedIncrease)
pending=rev(us2$pending)
us3 = data.frame (hospitalized,inIcu,onVent,deathIncrease,negativeIncrease,hospitalizedIncrease)
us3$posrate <- rev((us2$positiveIncrease/us2$totalTestResultsIncrease)*100)
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
ggpairs(us3)
#us2$hospitalizedCurrently 0.722
#us2$deathIncrease 0.24
#us2$inIcuCurrently 0.205
# we'll move forward for multivariant analysis factor these 3 variables
hist(us3$posrate, plot=T)
hist(us2$hospitalizedCurrently)
hist(us2$deathIncrease)
hist(us2$inIcuCurrently)
# clean dataset ready for modeling analaysis
us4 = data.frame(posrate=us3$posrate, hospitalized=us3$hospitalized,deathIncrease=us3$deathIncrease,inIcu=us3$inIcu)
summary(us4)
## posrate hospitalized deathIncrease inIcu
## Min. : 3.803 Min. :27770 Min. : 209.0 Min. : 5163
## 1st Qu.: 5.014 1st Qu.:31468 1st Qu.: 640.0 1st Qu.: 5628
## Median : 5.918 Median :37259 Median : 823.0 Median : 6402
## Mean : 6.446 Mean :38138 Mean : 917.2 Mean : 7496
## 3rd Qu.: 8.094 3rd Qu.:43004 3rd Qu.:1024.0 3rd Qu.: 9048
## Max. :10.234 Max. :56147 Max. :2740.0 Max. :12696
length(us4$posrate)#73
## [1] 73
is.na.data.frame(us4)# no missing value
## posrate hospitalized deathIncrease inIcu
## [1,] FALSE FALSE FALSE FALSE
## [2,] FALSE FALSE FALSE FALSE
## [3,] FALSE FALSE FALSE FALSE
## [4,] FALSE FALSE FALSE FALSE
## [5,] FALSE FALSE FALSE FALSE
## [6,] FALSE FALSE FALSE FALSE
## [7,] FALSE FALSE FALSE FALSE
## [8,] FALSE FALSE FALSE FALSE
## [9,] FALSE FALSE FALSE FALSE
## [10,] FALSE FALSE FALSE FALSE
## [11,] FALSE FALSE FALSE FALSE
## [12,] FALSE FALSE FALSE FALSE
## [13,] FALSE FALSE FALSE FALSE
## [14,] FALSE FALSE FALSE FALSE
## [15,] FALSE FALSE FALSE FALSE
## [16,] FALSE FALSE FALSE FALSE
## [17,] FALSE FALSE FALSE FALSE
## [18,] FALSE FALSE FALSE FALSE
## [19,] FALSE FALSE FALSE FALSE
## [20,] FALSE FALSE FALSE FALSE
## [21,] FALSE FALSE FALSE FALSE
## [22,] FALSE FALSE FALSE FALSE
## [23,] FALSE FALSE FALSE FALSE
## [24,] FALSE FALSE FALSE FALSE
## [25,] FALSE FALSE FALSE FALSE
## [26,] FALSE FALSE FALSE FALSE
## [27,] FALSE FALSE FALSE FALSE
## [28,] FALSE FALSE FALSE FALSE
## [29,] FALSE FALSE FALSE FALSE
## [30,] FALSE FALSE FALSE FALSE
## [31,] FALSE FALSE FALSE FALSE
## [32,] FALSE FALSE FALSE FALSE
## [33,] FALSE FALSE FALSE FALSE
## [34,] FALSE FALSE FALSE FALSE
## [35,] FALSE FALSE FALSE FALSE
## [36,] FALSE FALSE FALSE FALSE
## [37,] FALSE FALSE FALSE FALSE
## [38,] FALSE FALSE FALSE FALSE
## [39,] FALSE FALSE FALSE FALSE
## [40,] FALSE FALSE FALSE FALSE
## [41,] FALSE FALSE FALSE FALSE
## [42,] FALSE FALSE FALSE FALSE
## [43,] FALSE FALSE FALSE FALSE
## [44,] FALSE FALSE FALSE FALSE
## [45,] FALSE FALSE FALSE FALSE
## [46,] FALSE FALSE FALSE FALSE
## [47,] FALSE FALSE FALSE FALSE
## [48,] FALSE FALSE FALSE FALSE
## [49,] FALSE FALSE FALSE FALSE
## [50,] FALSE FALSE FALSE FALSE
## [51,] FALSE FALSE FALSE FALSE
## [52,] FALSE FALSE FALSE FALSE
## [53,] FALSE FALSE FALSE FALSE
## [54,] FALSE FALSE FALSE FALSE
## [55,] FALSE FALSE FALSE FALSE
## [56,] FALSE FALSE FALSE FALSE
## [57,] FALSE FALSE FALSE FALSE
## [58,] FALSE FALSE FALSE FALSE
## [59,] FALSE FALSE FALSE FALSE
## [60,] FALSE FALSE FALSE FALSE
## [61,] FALSE FALSE FALSE FALSE
## [62,] FALSE FALSE FALSE FALSE
## [63,] FALSE FALSE FALSE FALSE
## [64,] FALSE FALSE FALSE FALSE
## [65,] FALSE FALSE FALSE FALSE
## [66,] FALSE FALSE FALSE FALSE
## [67,] FALSE FALSE FALSE FALSE
## [68,] FALSE FALSE FALSE FALSE
## [69,] FALSE FALSE FALSE FALSE
## [70,] FALSE FALSE FALSE FALSE
## [71,] FALSE FALSE FALSE FALSE
## [72,] FALSE FALSE FALSE FALSE
## [73,] FALSE FALSE FALSE FALSE
#Create a line chart for daily test positive and death rate
# data used are from March 3rd to July 15th
# get the range for the x and y axis
xt=c(1:length(us1$date))
xrange <- range(xt)
# get the test positive rate
dposrate <- us1$positiveIncrease/(us1$positiveIncrease+us1$negativeIncrease)
dposrater<-rev(dposrate)
yrange <- range(dposrater)
# get the death to positive rate
ddrate <- us1$deathIncrease/us1$positiveIncrease
ddrater<-rev(ddrate)
# plot the graph
plot(xt, dposrater, type="o", lwd=0.5,
lty=1, col='blue', pch=15,cex=0.4,xlab="Date (March 04 to July 15)", ylab="Daily Test Positive / Death Rate")
lines(xt, ddrater, type="o", lwd=0.5,
lty=2, col='red', pch=16,cex=0.4)
# add a title and subtitle
title("Daily US Test Positive & Death Rate")
# add a legend
legend(100, yrange[2], legend=c("Positive", "Death"), cex=0.8, col=c("blue","red"), lty=1:2)
#Create a line chart for daily test positive and death count (March 03, to July 15)
# get the range for the x and y axis
# get the test positive count
dpos <- us1$positiveIncrease
dposr<-rev(dpos)
yrange <- range(dposr)
# get the death count
dd <- us1$deathIncrease
ddr<-rev(dd)
# plot the graph
plot(xt, dposr, type="o", lwd=0.5,
lty=1, col='blue', pch=15,cex=0.4,xlab="Date (March 04 to July 15)", ylab="Daily Test Positive / Death Count")
lines(xt, ddr, type="o", lwd=0.5,
lty=2, col='red', pch=16,cex=0.4)
# add a title and subtitle
title("Daily US Test Positive & Death Count")
# add a legend
legend(5, yrange[2], legend=c("Positive", "Death"), cex=0.8, col=c("blue","red"), lty=1:2)
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
plotts.sample.wge(dposrater)
## $autplt
## [1] 1.0000000 0.8869272 0.8680812 0.8503183 0.7903073 0.7813791 0.7607400
## [8] 0.7480428 0.7296608 0.7389640 0.7264490 0.6756612 0.6958569 0.6656387
## [15] 0.6640534 0.6502526 0.6021160 0.5909614 0.5680928 0.5558950 0.5188207
## [22] 0.5025282 0.4917558 0.4733021 0.4587867 0.4347135
##
## $freq
## [1] 0.007462687 0.014925373 0.022388060 0.029850746 0.037313433 0.044776119
## [7] 0.052238806 0.059701493 0.067164179 0.074626866 0.082089552 0.089552239
## [13] 0.097014925 0.104477612 0.111940299 0.119402985 0.126865672 0.134328358
## [19] 0.141791045 0.149253731 0.156716418 0.164179104 0.171641791 0.179104478
## [25] 0.186567164 0.194029851 0.201492537 0.208955224 0.216417910 0.223880597
## [31] 0.231343284 0.238805970 0.246268657 0.253731343 0.261194030 0.268656716
## [37] 0.276119403 0.283582090 0.291044776 0.298507463 0.305970149 0.313432836
## [43] 0.320895522 0.328358209 0.335820896 0.343283582 0.350746269 0.358208955
## [49] 0.365671642 0.373134328 0.380597015 0.388059701 0.395522388 0.402985075
## [55] 0.410447761 0.417910448 0.425373134 0.432835821 0.440298507 0.447761194
## [61] 0.455223881 0.462686567 0.470149254 0.477611940 0.485074627 0.492537313
## [67] 0.500000000
##
## $db
## [1] 17.1849025 -1.9573945 -1.0222534 -0.2746971 0.1276307 -0.8095633
## [7] -0.1818880 -4.8735910 -3.0803263 -0.3094095 -1.2016340 -2.6269452
## [13] -5.4361635 -12.8869678 -4.7396201 -2.7708140 -5.4613032 -15.1177175
## [19] -8.2619075 -7.8753721 -17.5877733 -10.9327719 -18.6492801 -25.4145610
## [25] -22.8167144 -27.0042665 -11.5344329 -6.5467609 -3.6915075 -16.8663437
## [31] -18.5196997 -11.0480780 -16.1299132 -21.8076343 -20.1105694 -12.1691103
## [37] -8.4572314 -12.6006590 -16.1507152 -11.2128581 -10.9418647 -6.9043297
## [43] -3.7563111 -9.8600573 -6.8303428 -6.2096606 -6.3149538 -21.4387545
## [49] -28.4882072 -12.2443745 -7.3894809 -11.0501004 -11.7172251 -5.6489935
## [55] -5.6240358 -8.0648930 -7.2319620 -9.2171920 -27.4439154 -13.6211039
## [61] -14.5765678 -25.7861235 -21.0316946 -9.7190535 -7.5058779 -21.0261805
## [67] -25.8419347
##
## $dbz
## [1] 11.2044994 10.6420938 9.6987222 8.3691728 6.6577431 4.6018827
## [7] 2.3240087 0.1043517 -1.6562611 -2.7603493 -3.4161863 -3.9208749
## [13] -4.3990158 -4.8457504 -5.2510187 -5.6612031 -6.1561114 -6.7978773
## [19] -7.6022728 -8.5349977 -9.5158106 -10.4182676 -11.0746386 -11.3331326
## [25] -11.1779644 -10.7728953 -10.3481431 -10.0803792 -10.0618176 -10.3151049
## [31] -10.8067007 -11.4461370 -12.0799893 -12.5105025 -12.5734060 -12.2358892
## [37] -11.6013733 -10.8201247 -10.0171717 -9.2768827 -8.6534957 -8.1828926
## [43] -7.8892288 -7.7866508 -7.8766836 -8.1409253 -8.5288240 -8.9450569
## [49] -9.2546628 -9.3337224 -9.1515757 -8.7982640 -8.4211404 -8.1502092
## [55] -8.0705121 -8.2267796 -8.6328537 -9.2732931 -10.0939742 -10.9851736
## [61] -11.7778687 -12.2967709 -12.4713181 -12.3858987 -12.1979826 -12.0379928
## [67] -11.9775222
# ACF slow dumping, a tiny cyclic behavior. Parzen window show strong frequency at 0, weak frequency at 0.3 and 0.4.
# try ARMA model directly
is.na(dposrater)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE FALSE
aic5.wge(dposrater,p=0:10, q=0:5)
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of aic
## p q aic
## 63 10 2 -7.408574
## 51 8 2 -7.389009
## 64 10 3 -7.373691
## 62 10 1 -7.326196
## 44 7 1 -7.303328
# AIC lingers around -7.x, doesn't change much. aic5.wge always prefer to pick high order p&q
# try to take first diff
dposrater_d1<-artrans.wge(dposrater,phi.tr = 1)
acf(dposrater_d1)
# ACF looks much better, still 3 lags are above the limit line.
aic5.wge(dposrater_d1, p=0:10, q=0:4)
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of aic
## p q aic
## 52 10 1 -7.473906
## 38 7 2 -7.472336
## 51 10 0 -7.466801
## 53 10 2 -7.461169
## 49 9 3 -7.385656
# still pick 10, 1.AIC lingers around -7.4 is there any other thing we can do here?
aic5.wge(dposrater_d1, p=0:10, q=0:4, type="bic")
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of bic
## p q bic
## 2 0 1 -7.258225
## 38 7 2 -7.255016
## 51 10 0 -7.227750
## 7 1 1 -7.223371
## 4 0 3 -7.213250
# BIC pick 0, 1, BIC lingers around -7.2
# lets first test out the residual
estd1aic <- est.arma.wge(dposrater_d1, p=10, q=1, factor = T)
##
## Coefficients of Original polynomial:
## -1.0004 -0.5116 -0.2541 -0.4236 -0.2947 -0.0075 0.0816 0.0384 0.2274 0.3758
##
## Factor Roots Abs Recip System Freq
## 1+0.9251B+0.9084B^2 -0.5092+-0.9174i 0.9531 0.3306
## 1+1.6248B+0.9078B^2 -0.8949+-0.5484i 0.9528 0.4125
## 1+0.9132B -1.0951 0.9132 0.5000
## 1-1.2801B+0.8217B^2 0.7789+-0.7812i 0.9065 0.1252
## 1-0.3765B+0.7533B^2 0.2499+-1.1247i 0.8680 0.2152
## 1-0.8061B 1.2405 0.8061 0.0000
##
##
fore.aruma.wge(dposrater,d=1, phi = estd1aic$phi,theta = estd1aic$theta, n.ahead = 40, lastn= T, limits=F )
## $f
## [1] 0.04553089 0.04284428 0.04598659 0.04573289 0.04300856 0.04402237
## [7] 0.04244493 0.04218625 0.04479960 0.04345762 0.04318432 0.04351993
## [13] 0.04374729 0.04241314 0.04316301 0.04321173 0.04226448 0.04391539
## [19] 0.04350377 0.04254496 0.04354990 0.04290702 0.04278937 0.04308944
## [25] 0.04326403 0.04278179 0.04325579 0.04346832 0.04258768 0.04322394
## [31] 0.04317545 0.04265355 0.04333698 0.04309370 0.04295115 0.04313800
## [37] 0.04319507 0.04288424 0.04301477 0.04325720
##
## $ll
## [1] 0.0071202237 0.0004555798 -0.0003705090 -0.0042336397 -0.0074844352
## [6] -0.0089012519 -0.0136234585 -0.0165334855 -0.0171122663 -0.0233294827
## [11] -0.0281307554 -0.0289498173 -0.0324613464 -0.0350846677 -0.0362392294
## [16] -0.0401212914 -0.0430718433 -0.0441232729 -0.0473160499 -0.0504298371
## [21] -0.0512563375 -0.0538500256 -0.0562832278 -0.0574269216 -0.0601706854
## [26] -0.0626424013 -0.0638001606 -0.0660241088 -0.0684348504 -0.0695360872
## [31] -0.0715020481 -0.0737697203 -0.0748493632 -0.0768944768 -0.0789867677
## [36] -0.0801692006 -0.0819707425 -0.0839189934 -0.0851676787 -0.0868015242
##
## $ul
## [1] 0.08394155 0.08523298 0.09234368 0.09569942 0.09350155 0.09694600
## [7] 0.09851331 0.10090598 0.10671147 0.11024473 0.11449940 0.11598967
## [13] 0.11995593 0.11991095 0.12256525 0.12654476 0.12760079 0.13195405
## [19] 0.13432359 0.13551977 0.13835615 0.13966406 0.14186198 0.14360580
## [25] 0.14669875 0.14820599 0.15031174 0.15296074 0.15361020 0.15598396
## [31] 0.15785295 0.15907683 0.16152332 0.16308188 0.16488906 0.16644520
## [37] 0.16836087 0.16968746 0.17119722 0.17331592
##
## $resid
## [1] 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## [6] 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## [11] 0.0000000000 -0.0039999620 -0.0739936114 0.0363590434 0.0213066453
## [16] 0.0472709714 -0.0093265449 0.0281772730 0.0393115959 0.0057687847
## [21] -0.0496757505 0.0145343798 0.0068346192 0.0017334952 0.0240223114
## [26] 0.0064135655 0.0092724695 0.0215753337 0.0196701116 0.0417205695
## [31] 0.0109668264 -0.0932690919 0.0223541072 -0.0106624045 -0.0068957780
## [36] -0.0222751525 0.0183258408 -0.0003240235 0.0198974353 -0.0157488966
## [41] -0.0022873029 -0.0151169221 0.0142354288 -0.0002772878 -0.0159155653
## [46] -0.0074287305 -0.0044220713 -0.0176438040 -0.0061500383 -0.0818353354
## [51] 0.0292037538 -0.0109800898 -0.0267642203 -0.0166364390 -0.0165518568
## [56] 0.0148986276 -0.0150003119 0.0071164033 0.0048502716 0.0073928038
## [61] -0.0352980200 0.0107165961 -0.0319613559 0.0136740260 -0.0059624661
## [66] -0.0089577824 -0.0121959316 -0.0054712390 -0.0316942692 0.0022635168
## [71] 0.0018637173 0.0075889624 -0.0120564437 -0.0008900479 -0.0058186418
## [76] -0.0027530270 -0.0052448308 0.0017408074 0.0023080118 -0.0050652307
## [81] 0.0025575981 -0.0082652545 -0.0045770039 0.0076808857 0.0130910996
## [86] -0.0085683742 -0.0058034786 0.0038407670 0.0042357781 -0.0065341917
## [91] -0.0053809293 -0.0025912040 -0.0004371642 -0.0071759615 -0.0004346580
## [96] -0.0005459094 -0.0025805336 -0.0031556082 0.0086458854 0.0032903552
## [101] -0.0077221452 0.0078777877 -0.0006608687 -0.0016496700 0.0040121631
## [106] 0.0047182790 0.0053271036 0.0017535559 0.0034037426 0.0028891269
## [111] 0.0048646978 0.0073911792 0.0192005983 -0.0106556817 0.0082998347
## [116] 0.0065950189 0.0017933527 -0.0174259314 0.0060903908 0.0178127323
## [121] 0.0036572659 -0.0085257319 0.0057117166 -0.0067023193 0.0140513222
## [126] 0.0009403316 0.0186788346 -0.0088552550 -0.0085178429 0.0144148195
## [131] -0.0019128732 -0.0105996177 -0.0025830497 0.0066987745
##
## $wnv
## [1] 0.0003840533
##
## $se
## [1] 0.01959728 0.02162689 0.02365158 0.02549313 0.02576173 0.02700185
## [7] 0.02860632 0.02995905 0.03158769 0.03407505 0.03638524 0.03697436
## [13] 0.03888196 0.03953970 0.04051135 0.04251685 0.04353894 0.04491768
## [19] 0.04633664 0.04743612 0.04837053 0.04936584 0.05054725 0.05128386
## [25] 0.05277281 0.05378785 0.05462038 0.05586348 0.05664415 0.05753063
## [31] 0.05850893 0.05939963 0.06029915 0.06121846 0.06221322 0.06291184
## [37] 0.06386011 0.06469553 0.06539921 0.06635649
##
## $psi
## [1] 0.4667524 0.4885730 0.4854386 0.1893336 0.4127651 0.4819912 0.4541823
## [8] 0.5108806 0.6521292 0.6510290 0.3354540 0.6138238 0.3664788 0.4500308
## [15] 0.6584592 0.4785622 0.5635229 0.5806522 0.5181269 0.4828035 0.5032814
## [22] 0.5543900 0.4419400 0.6351560 0.5306942 0.4847692 0.5980073 0.4782196
## [29] 0.5133619 0.5436806 0.5229295 0.5294874 0.5393287 0.5654281 0.4770849
## [36] 0.5594762 0.5288127 0.4882284 0.5730714 0.5154248
##
## $ptot
## [1] 11
##
## $phitot
## [1] -0.0003645301 0.4887431558 0.2574946290 -0.1694624292 0.1288714317
## [6] 0.2872520914 0.0890943500 -0.0432114691 0.1889720066 0.1483861169
## [11] -0.3757753530
# as we have 1-B term, the long term forcast is just repeating the last value in dataset.
# try forecast without the 1-B
estaic <- est.arma.wge(dposrater, p=10, q=2, factor = T)
##
## Coefficients of Original polynomial:
## -0.8628 0.2800 0.7777 0.1435 -0.0007 0.2721 0.2637 -0.0886 -0.0682 0.2012
##
## Factor Roots Abs Recip System Freq
## 1-0.9884B 1.0118 0.9884 0.0000
## 1+1.6766B+0.9598B^2 -0.8735+-0.5282i 0.9797 0.4134
## 1+0.9536B+0.8564B^2 -0.5567+-0.9261i 0.9254 0.3361
## 1+0.8930B -1.1198 0.8930 0.5000
## 1-0.5509B+0.5853B^2 0.4707+-1.2195i 0.7650 0.1914
## 1-1.1211B+0.4738B^2 1.1831+-0.8432i 0.6883 0.0986
##
##
fore.aruma.wge(dposrater, phi = estaic$phi,theta = estaic$theta, n.ahead = 80, lastn= F, limits=F )
## $f
## [1] 0.09046394 0.08682474 0.09465579 0.08150466 0.09205723 0.09117187
## [7] 0.08483302 0.09554320 0.08626470 0.09138996 0.09163261 0.08962519
## [13] 0.09365068 0.08850201 0.09558623 0.08912740 0.09349305 0.09453649
## [19] 0.08918413 0.09741067 0.09065554 0.09466005 0.09496027 0.09224209
## [25] 0.09710799 0.09209900 0.09732234 0.09396112 0.09522708 0.09763389
## [31] 0.09299941 0.09911673 0.09451177 0.09696615 0.09796546 0.09501309
## [37] 0.09962530 0.09541843 0.09904662 0.09761238 0.09723579 0.10016115
## [43] 0.09623402 0.10069907 0.09784052 0.09895392 0.10051224 0.09762997
## [49] 0.10156960 0.09839439 0.10069081 0.10047927 0.09927927 0.10220408
## [55] 0.09903463 0.10217107 0.10062851 0.10077426 0.10262217 0.10002406
## [61] 0.10316455 0.10098428 0.10223669 0.10277329 0.10124437 0.10387629
## [67] 0.10146132 0.10353747 0.10293346 0.10246925 0.10436182 0.10217022
## [73] 0.10452970 0.10319300 0.10368460 0.10462987 0.10307120 0.10527228
## [79] 0.10354989 0.10480568
##
## $ll
## [1] 0.0450603030 0.0360765553 0.0394586038 0.0217355573 0.0313823395
## [6] 0.0280196721 0.0182493798 0.0270075314 0.0161361932 0.0191113261
## [11] 0.0168313730 0.0140593268 0.0143314561 0.0082784603 0.0140011780
## [16] 0.0050884636 0.0087067922 0.0078804809 0.0012435823 0.0082674355
## [21] 0.0002025128 0.0031437181 0.0019023519 -0.0014409595 0.0017447383
## [26] -0.0040639257 0.0003033379 -0.0045164234 -0.0037693899 -0.0026162225
## [31] -0.0080280685 -0.0027031338 -0.0082847340 -0.0064375990 -0.0064859741
## [36] -0.0099441726 -0.0062727777 -0.0111413595 -0.0080659490 -0.0104557277
## [41] -0.0112293451 -0.0091342334 -0.0136153056 -0.0096792392 -0.0132606529
## [46] -0.0125462294 -0.0117138267 -0.0149976660 -0.0116373480 -0.0153409002
## [51] -0.0134182806 -0.0142856993 -0.0157982621 -0.0134247817 -0.0170155311
## [56] -0.0142398910 -0.0163146977 -0.0164542463 -0.0151096320 -0.0180267396
## [61] -0.0152634367 -0.0178556909 -0.0168670228 -0.0167890274 -0.0185669142
## [66] -0.0163053378 -0.0190473233 -0.0172233425 -0.0182170092 -0.0188958655
## [71] -0.0173522271 -0.0197974001 -0.0176929723 -0.0193451964 -0.0190466099
## [76] -0.0184241486 -0.0201819722 -0.0182333794 -0.0202097677 -0.0191347795
##
## $ul
## [1] 0.1358676 0.1375729 0.1498530 0.1412738 0.1527321 0.1543241 0.1514167
## [8] 0.1640789 0.1563932 0.1636686 0.1664338 0.1651911 0.1729699 0.1687256
## [15] 0.1771713 0.1731663 0.1782793 0.1811925 0.1771247 0.1865539 0.1811086
## [22] 0.1861764 0.1880182 0.1859251 0.1924712 0.1882619 0.1943413 0.1924387
## [29] 0.1942236 0.1978840 0.1940269 0.2009366 0.1973083 0.2003699 0.2024169
## [36] 0.1999704 0.2055234 0.2019782 0.2061592 0.2056805 0.2057009 0.2094565
## [43] 0.2060833 0.2110774 0.2089417 0.2104541 0.2127383 0.2102576 0.2147766
## [50] 0.2121297 0.2147999 0.2152442 0.2143568 0.2178329 0.2150848 0.2185820
## [57] 0.2175717 0.2180028 0.2203540 0.2180749 0.2215925 0.2198242 0.2213404
## [64] 0.2223356 0.2210557 0.2240579 0.2219700 0.2242983 0.2240839 0.2238344
## [71] 0.2260759 0.2241378 0.2267524 0.2257312 0.2264158 0.2276839 0.2263244
## [78] 0.2287779 0.2273095 0.2287461
##
## $resid
## [1] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [6] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [11] -4.043851e-02 8.039758e-02 -1.178948e-01 6.774276e-02 -4.961026e-03
## [16] -1.405217e-03 1.261723e-03 7.705120e-03 5.090656e-02 1.557385e-02
## [21] -4.904113e-02 1.034379e-02 4.621164e-03 1.752960e-02 2.479506e-02
## [26] 5.099403e-03 2.599601e-02 1.707985e-02 2.415617e-02 3.813433e-02
## [31] -1.169165e-03 -7.358963e-02 3.045081e-02 -4.703371e-03 6.294692e-03
## [36] -1.591581e-02 2.501523e-02 6.789122e-03 2.316101e-02 -2.539909e-02
## [41] -1.304472e-02 -1.250431e-02 2.079005e-02 9.690281e-03 -1.748688e-02
## [46] 2.721836e-03 -2.897509e-03 -1.898228e-02 -3.483165e-03 -9.292373e-02
## [51] 3.826990e-02 -7.866898e-03 -2.098428e-02 -1.517631e-02 -2.628232e-02
## [56] 1.584698e-02 -2.586034e-02 1.393126e-03 -3.667954e-03 3.529978e-03
## [61] -2.519869e-02 4.111394e-03 -4.114971e-02 1.598877e-02 -8.674887e-03
## [66] -1.043706e-02 -6.741850e-03 -1.375786e-02 -3.100093e-02 -3.354298e-03
## [71] -3.655560e-03 4.943688e-03 -1.335403e-02 3.733053e-05 -1.251938e-02
## [76] -7.850640e-03 -1.083139e-02 -5.134337e-03 1.773355e-03 -3.126970e-03
## [81] 1.573347e-03 -1.257067e-02 -7.929839e-03 2.857129e-03 8.297269e-03
## [86] -8.866218e-03 -8.367533e-03 3.506115e-03 2.273695e-03 -8.890312e-03
## [91] -8.980982e-03 -4.752187e-03 -8.005004e-04 -6.603473e-03 -2.808370e-03
## [96] -4.885122e-03 -3.867527e-03 -4.188559e-03 4.154389e-03 -4.999517e-04
## [101] -1.033769e-02 5.803065e-03 -3.228714e-03 -3.440704e-03 1.327057e-03
## [106] 2.176459e-03 4.377936e-03 4.616404e-04 1.862384e-03 -2.761838e-04
## [111] 2.919216e-03 6.696175e-03 1.746019e-02 -1.199935e-02 9.930755e-03
## [116] 4.598299e-03 7.677183e-04 -1.641156e-02 4.589696e-03 1.908465e-02
## [121] 5.195409e-03 -6.655687e-03 3.733049e-03 -9.313651e-03 1.467113e-02
## [126] -1.439113e-03 1.655986e-02 -7.256988e-03 -4.756600e-03 1.496201e-02
## [131] -5.703629e-03 -1.131819e-02 -2.416954e-03 7.713896e-03
##
## $wnv
## [1] 0.0005366229
##
## $se
## [1] 0.02316512 0.02589193 0.02816183 0.03049444 0.03095657 0.03222051
## [7] 0.03397124 0.03496718 0.03577985 0.03687685 0.03816390 0.03855401
## [13] 0.04046899 0.04093038 0.04162503 0.04287701 0.04325829 0.04421225
## [19] 0.04486763 0.04548124 0.04614951 0.04669201 0.04747853 0.04779747
## [25] 0.04865472 0.04906272 0.04949949 0.05024364 0.05050840 0.05114802
## [31] 0.05154463 0.05194891 0.05244719 0.05275702 0.05329155 0.05354963
## [37] 0.05402963 0.05436724 0.05464927 0.05513679 0.05533935 0.05576295
## [43] 0.05604557 0.05631546 0.05668427 0.05688783 0.05725820 0.05746308
## [49] 0.05775865 0.05802821 0.05821893 0.05855355 0.05871303 0.05899432
## [55] 0.05920927 0.05939335 0.05966490 0.05981046 0.06006724 0.06023000
## [61] 0.06042244 0.06063264 0.06076720 0.06100118 0.06112821 0.06131716
## [67] 0.06148400 0.06161266 0.06181147 0.06192098 0.06209900 0.06222838
## [73] 0.06235851 0.06251949 0.06261796 0.06278266 0.06288427 0.06301309
## [79] 0.06314268 0.06323493
##
## $psi
## [1] 0.4992795 0.4781693 0.5049441 0.2300463 0.3857512 0.4647053 0.3576937
## [8] 0.3273234 0.3853964 0.4242387 0.2361628 0.5310360 0.2645523 0.3269031
## [15] 0.4440151 0.2473899 0.3943306 0.3298384 0.3214210 0.3378002 0.3063638
## [22] 0.3715173 0.2379648 0.3925316 0.2725702 0.2832361 0.3719104 0.2229559
## [29] 0.3480900 0.2754991 0.2792302 0.3113476 0.2464553 0.3250166 0.2266771
## [36] 0.3102086 0.2611431 0.2393638 0.3158174 0.2042127 0.2961446 0.2426639
## [43] 0.2377196 0.2786791 0.2075629 0.2806797 0.2092867 0.2519187 0.2411690
## [50] 0.2032610 0.2698464 0.1866796 0.2483961 0.2175945 0.2017055 0.2454550
## [57] 0.1800241 0.2395054 0.1910126 0.2080086 0.2177554 0.1744782 0.2304214
## [64] 0.1700290 0.2076393 0.1953973 0.1717944 0.2138359 0.1589041 0.2028412
## [71] 0.1731304 0.1738147 0.1935511 0.1515379 0.1961832 0.1542579 0.1738450
## [78] 0.1745446 0.1473932 0.1847774
##
## $ptot
## [1] 10
##
## $phitot
## [1] -0.8628300742 0.2799978066 0.7777257868 0.1435384257 -0.0006915594
## [6] 0.2721321259 0.2637148325 -0.0886366867 -0.0682142288 0.2011735763
# now it is very slowly trending towards dataset mean because it is a stationary model
# what about BIC pick simple model?
estd1bic <- est.arma.wge(dposrater_d1, p=0, q=1, factor = T)
fore.aruma.wge(dposrater,d=1, phi = estd1bic$phi,theta = estd1bic$theta, n.ahead = 40, lastn= F, limits=F )
## $f
## [1] 0.08489329 0.08489329 0.08489329 0.08489329 0.08489329 0.08489329
## [7] 0.08489329 0.08489329 0.08489329 0.08489329 0.08489329 0.08489329
## [13] 0.08489329 0.08489329 0.08489329 0.08489329 0.08489329 0.08489329
## [19] 0.08489329 0.08489329 0.08489329 0.08489329 0.08489329 0.08489329
## [25] 0.08489329 0.08489329 0.08489329 0.08489329 0.08489329 0.08489329
## [31] 0.08489329 0.08489329 0.08489329 0.08489329 0.08489329 0.08489329
## [37] 0.08489329 0.08489329 0.08489329 0.08489329
##
## $ll
## [1] 0.033524177 0.029836716 0.026381181 0.023118639 0.020019967
## [6] 0.017062702 0.014229090 0.011504805 0.008878092 0.006339163
## [11] 0.003879764 0.001492858 -0.000827609 -0.003086896 -0.005289600
## [16] -0.007439771 -0.009540998 -0.011596478 -0.013609075 -0.015581366
## [21] -0.017515679 -0.019414128 -0.021278637 -0.023110963 -0.024912717
## [26] -0.026685381 -0.028430319 -0.030148793 -0.031841972 -0.033510941
## [31] -0.035156710 -0.036780220 -0.038382351 -0.039963925 -0.041525714
## [36] -0.043068444 -0.044592793 -0.046099406 -0.047588886 -0.049061805
##
## $ul
## [1] 0.1362624 0.1399499 0.1434054 0.1466679 0.1497666 0.1527239 0.1555575
## [8] 0.1582818 0.1609085 0.1634474 0.1659068 0.1682937 0.1706142 0.1728735
## [15] 0.1750762 0.1772263 0.1793276 0.1813831 0.1833956 0.1853679 0.1873023
## [22] 0.1892007 0.1910652 0.1928975 0.1946993 0.1964720 0.1982169 0.1999354
## [29] 0.2016285 0.2032975 0.2049433 0.2065668 0.2081689 0.2097505 0.2113123
## [36] 0.2128550 0.2143794 0.2158860 0.2173755 0.2188484
##
## $resid
## [1] 0.000000e+00 7.766441e-02 -2.087104e-02 8.347763e-02 8.108788e-02
## [6] -4.868125e-02 -5.979305e-02 -7.581236e-02 -5.616936e-02 -5.117963e-02
## [11] 5.527362e-02 -5.122946e-06 -7.698556e-02 7.376985e-02 -3.676559e-02
## [16] 1.723690e-02 1.486201e-02 -9.605920e-03 4.583775e-02 2.131918e-02
## [21] -2.989453e-02 -1.961301e-02 8.828622e-03 1.752415e-02 9.813993e-03
## [26] 4.581003e-02 -1.135203e-02 2.801457e-02 3.486975e-02 1.945107e-02
## [31] 1.681491e-02 -8.497345e-02 1.624352e-02 -1.318626e-02 8.117023e-04
## [36] 8.247929e-03 -7.473808e-04 1.520034e-02 1.556065e-02 -1.309813e-02
## [41] -2.117926e-02 -3.143723e-02 3.299033e-02 -1.212708e-02 5.194006e-03
## [46] -6.495391e-03 -1.729115e-02 -1.148272e-02 -1.234142e-02 -9.050180e-02
## [51] 1.988479e-02 -7.754294e-03 -1.965136e-02 -9.351329e-03 -2.562852e-02
## [56] -6.593893e-03 -1.230225e-02 6.011534e-03 -1.165136e-02 -1.008836e-03
## [61] -9.792586e-03 -1.765100e-02 -2.546018e-02 4.003576e-03 -8.745599e-03
## [66] -3.825964e-03 -9.951711e-03 -1.045165e-02 -3.944013e-02 1.495623e-03
## [71] -6.102772e-03 3.173990e-03 -2.440380e-03 -2.247199e-03 -1.524607e-02
## [76] -4.533009e-03 -1.000656e-02 -5.745793e-03 1.519587e-03 3.547392e-03
## [81] -2.485419e-03 -4.186732e-03 -1.078829e-02 3.244309e-03 1.048790e-02
## [86] -4.386930e-03 -6.437752e-03 3.579397e-03 1.007811e-03 -4.489523e-03
## [91] -4.539783e-03 -6.997813e-03 -2.614985e-03 -6.868797e-04 -1.450890e-03
## [96] -3.892101e-03 -1.547749e-03 -2.994660e-03 5.838588e-03 3.284515e-03
## [101] -8.225706e-03 7.060214e-03 -1.542901e-03 -2.672595e-03 6.422571e-03
## [106] 2.349912e-03 6.003221e-03 4.639297e-03 4.742350e-03 -2.910855e-04
## [111] 5.048694e-03 1.076669e-02 1.620161e-02 -4.284476e-03 9.717002e-03
## [116] 4.800781e-03 1.674671e-03 -1.258471e-02 4.489182e-03 1.650284e-02
## [121] 9.279507e-03 1.277572e-03 3.474781e-03 -1.412238e-02 1.769164e-02
## [126] 1.399145e-03 1.827950e-02 -3.934242e-03 -5.481609e-03 1.484647e-02
## [131] -6.194814e-03 -6.680009e-03 -2.364427e-03 1.908099e-03
##
## $wnv
## [1] 0.0006868975
##
## $se
## [1] 0.02620873 0.02809009 0.02985312 0.03151768 0.03309863 0.03460744
## [7] 0.03605316 0.03744310 0.03878326 0.04007863 0.04133343 0.04255124
## [13] 0.04373515 0.04488785 0.04601168 0.04710870 0.04818076 0.04922947
## [19] 0.05025631 0.05126258 0.05224947 0.05321807 0.05416935 0.05510421
## [25] 0.05602347 0.05692789 0.05781817 0.05869494 0.05955881 0.06041032
## [31] 0.06125000 0.06207832 0.06289573 0.06370266 0.06449949 0.06528660
## [37] 0.06606433 0.06683301 0.06759294 0.06834443
##
## $psi
## [1] 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425
## [8] 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425
## [15] 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425
## [22] 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425
## [29] 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425
## [36] 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425
##
## $ptot
## [1] 1
##
## $phitot
## [1] 1
#very very straight line, not good!
# calculating ASE
f_dposrater_d1<-fore.aruma.wge(dposrater,d=1, phi = estd1bic$phi,theta = estd1bic$theta, n.ahead = 20, lastn= T, limits=F )
ase_dposrater_d1_aic = mean((f_dposrater_d1$f - dposrater[length(f_dposrater_d1$f)-20+1:length(f_dposrater_d1$f)])^2)
ase_dposrater_d1_aic
## [1] 0.01633175
# trial run, late decide to run data from May 03
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# try to work on positive count, rather than rate
plotts.sample.wge(dposr)
## $autplt
## [1] 1.00000000 0.94030333 0.87704100 0.82310353 0.78139908 0.74729768
## [7] 0.71461761 0.67511182 0.60755356 0.54009064 0.48457571 0.44771154
## [13] 0.41170553 0.37127579 0.32238768 0.25466332 0.19187584 0.14685491
## [19] 0.10759282 0.07496058 0.04743082 0.01547334 -0.02913890 -0.07022287
## [25] -0.09574474 -0.10917801
##
## $freq
## [1] 0.007462687 0.014925373 0.022388060 0.029850746 0.037313433 0.044776119
## [7] 0.052238806 0.059701493 0.067164179 0.074626866 0.082089552 0.089552239
## [13] 0.097014925 0.104477612 0.111940299 0.119402985 0.126865672 0.134328358
## [19] 0.141791045 0.149253731 0.156716418 0.164179104 0.171641791 0.179104478
## [25] 0.186567164 0.194029851 0.201492537 0.208955224 0.216417910 0.223880597
## [31] 0.231343284 0.238805970 0.246268657 0.253731343 0.261194030 0.268656716
## [37] 0.276119403 0.283582090 0.291044776 0.298507463 0.305970149 0.313432836
## [43] 0.320895522 0.328358209 0.335820896 0.343283582 0.350746269 0.358208955
## [49] 0.365671642 0.373134328 0.380597015 0.388059701 0.395522388 0.402985075
## [55] 0.410447761 0.417910448 0.425373134 0.432835821 0.440298507 0.447761194
## [61] 0.455223881 0.462686567 0.470149254 0.477611940 0.485074627 0.492537313
## [67] 0.500000000
##
## $db
## [1] 4.9424430 14.1064877 12.1363565 8.0575206 2.2824269 0.6438849
## [7] 2.2390356 0.1335972 -0.9670253 0.1923986 -2.8709965 -2.5687599
## [13] -2.7559601 -5.6278100 -5.0698519 -9.6786254 -8.5964012 -6.3423115
## [19] -3.3068006 -1.6718687 -2.9718973 -4.0934334 -8.1039003 -7.0961801
## [25] -7.2102604 -11.7152365 -10.0702766 -10.2333089 -13.7776917 -9.9888756
## [31] -10.0537597 -9.9494512 -8.7717003 -9.6351943 -11.8016155 -14.1931335
## [37] -9.0845318 -5.9426752 -9.5996526 -11.6511747 -19.7512049 -10.7586641
## [43] -14.1772539 -19.4372777 -13.1999421 -11.4926902 -10.1252570 -13.8628534
## [49] -18.0053337 -11.1500199 -15.1965479 -17.1489966 -17.6979597 -11.8316771
## [55] -12.1531021 -11.4287974 -12.2683953 -11.3614425 -10.6478686 -14.5484499
## [61] -12.6329743 -16.3061089 -11.5235513 -19.1052360 -12.6822944 -20.7582976
## [67] -23.4465724
##
## $dbz
## [1] 10.83907999 10.40847146 9.68852600 8.67732615 7.37603935
## [6] 5.79525151 3.96744682 1.96840355 -0.05783602 -1.90328744
## [11] -3.38061144 -4.43927046 -5.15017307 -5.58206215 -5.75586721
## [16] -5.70322187 -5.51462375 -5.31511206 -5.21494784 -5.28702279
## [21] -5.56976984 -6.07657283 -6.80095441 -7.71458905 -8.75880357
## [26] -9.83516604 -10.81114406 -11.56132640 -12.03055973 -12.25441514
## [31] -12.31134349 -12.26725653 -12.16136809 -12.02201018 -11.88332370
## [36] -11.78833610 -11.78109525 -11.89659336 -12.15330767 -12.54802407
## [41] -13.05148352 -13.60651129 -14.13566281 -14.56620172 -14.86514574
## [46] -15.05531739 -15.19338515 -15.32913346 -15.47637632 -15.60711387
## [51] -15.66805360 -15.61381648 -15.43877115 -15.18317638 -14.91121342
## [56] -14.68355173 -14.54255473 -14.51040219 -14.59297578 -14.78423541
## [61] -15.06901729 -15.42393088 -15.81680534 -16.20587116 -16.54099161
## [66] -16.77004861 -16.85172798
aic5.wge(dposr,p=0:10,q=0:4)
## ---------WORKING... PLEASE WAIT...
##
##
## Error in aic calculation at 4 2
## Error in aic calculation at 4 3
## Error in aic calculation at 5 0
## Error in aic calculation at 6 0
## Error in aic calculation at 6 1
## Error in aic calculation at 6 2
## Error in aic calculation at 6 4
## Error in aic calculation at 7 0
## Error in aic calculation at 7 2
## Error in aic calculation at 7 3
## Error in aic calculation at 7 4
## Five Smallest Values of aic
## p q aic
## 43 8 2 15.68195
## 47 9 1 15.68653
## 51 10 0 15.69133
## 42 8 1 15.69172
## 46 9 0 15.69267
# pick 8,2, better AIC 15.68 on count, vs. -7.4 on rate
aic5.wge(dposr,p=0:10,q=0:4, type='bic')
## ---------WORKING... PLEASE WAIT...
##
##
## Error in aic calculation at 4 2
## Error in aic calculation at 4 3
## Error in aic calculation at 5 0
## Error in aic calculation at 6 0
## Error in aic calculation at 6 1
## Error in aic calculation at 6 2
## Error in aic calculation at 6 4
## Error in aic calculation at 7 0
## Error in aic calculation at 7 2
## Error in aic calculation at 7 3
## Error in aic calculation at 7 4
## Five Smallest Values of bic
## p q bic
## 42 8 1 15.90798
## 46 9 0 15.90892
## 43 8 2 15.91983
## 47 9 1 15.92442
## 51 10 0 15.92921
# pick 8, 1, not much different
# estimate model phi and theta
estaic_ct <- est.arma.wge(dposr, p=8, q=2, factor = T)
##
## Coefficients of Original polynomial:
## 1.8021 -1.1264 0.2207 0.2179 -0.2538 0.5316 -0.2885 -0.1119
##
## Factor Roots Abs Recip System Freq
## 1-1.9615B+0.9641B^2 1.0173+-0.0483i 0.9819 0.0076
## 1-1.2266B+0.9629B^2 0.6369+-0.7955i 0.9813 0.1426
## 1+0.3863B+0.6365B^2 -0.3034+-1.2161i 0.7978 0.2889
## 1+0.7458B -1.3409 0.7458 0.5000
## 1+0.2539B -3.9389 0.2539 0.5000
##
##
f_dposr_ct <- fore.aruma.wge(dposr, phi = estaic_ct$phi,theta = estaic_ct$theta, n.ahead = 20, lastn= T, limits=F )
# ASE
ase_dposr_ct = mean((f_dposr_ct$f - dposr[length(f_dposr_ct$f)-20+1:length(f_dposr_ct$f)])^2)
ase_dposr_ct
## [1] 1949770638
# ASE 1949770638, too large, try to take a diff
# complete the model, test residual
ljung.wge(f_dposr_ct$res,p=8,q=2)
## Obs -0.01394181 -0.01704525 -0.01386465 -0.01703005 0.005111981 -0.009188232 -0.02885949 0.004736653 -0.0180852 0.04396489 -0.03633083 -0.03272244 0.0008131524 0.06869748 -0.005879667 -0.03381658 0.01973066 0.07871697 -0.1226284 0.0004623474 0.01575949 0.03214822 -0.03849347 -0.009459984
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 24
##
## $chi.square
## [1] 5.742086
##
## $df
## [1] 14
##
## $pval
## [1] 0.9725793
# pval = 0.9725793
plotts.wge(f_dposr_ct$res)
# forecast out
f_dposr_ct_7 <- fore.aruma.wge(dposr, phi = estaic_ct$phi,theta = estaic_ct$theta, n.ahead = 7, lastn= F, limits=T )
f_dposr_ct_90 <- fore.aruma.wge(dposr, phi = estaic_ct$phi,theta = estaic_ct$theta, n.ahead = 90, lastn= F, limits=T )
# trial run, late decide to run data from May 03
# get the test positive rate
# us4 dataset contains all variables needed for both Uni and multi variant analysis, already sequence reversed, older one first.
dposrater_may = us4$posrate
plotts.sample.wge(dposrater_may)
## $autplt
## [1] 1.000000000 0.793243460 0.775540019 0.685701466 0.657283710
## [6] 0.570114234 0.551260793 0.463950606 0.455998446 0.371238640
## [11] 0.323622942 0.243563452 0.204482178 0.152947322 0.142972844
## [16] 0.061880862 -0.009661695 -0.005742598 -0.040030206 -0.120922069
## [21] -0.146085583 -0.172481197 -0.216397590 -0.237802101 -0.300278105
## [26] -0.320390970
##
## $freq
## [1] 0.01369863 0.02739726 0.04109589 0.05479452 0.06849315 0.08219178
## [7] 0.09589041 0.10958904 0.12328767 0.13698630 0.15068493 0.16438356
## [13] 0.17808219 0.19178082 0.20547945 0.21917808 0.23287671 0.24657534
## [19] 0.26027397 0.27397260 0.28767123 0.30136986 0.31506849 0.32876712
## [25] 0.34246575 0.35616438 0.36986301 0.38356164 0.39726027 0.41095890
## [31] 0.42465753 0.43835616 0.45205479 0.46575342 0.47945205 0.49315068
##
## $db
## [1] 14.6628314 -1.6736907 -0.9266359 -13.5179982 -14.7748931 -4.7699180
## [7] -7.9912762 -7.2133867 -2.4763669 -8.3631330 -5.1740948 -6.4803604
## [13] -21.5733796 -10.0005043 -11.1183279 -1.7859093 -29.3217186 -15.6758582
## [19] -12.8292962 -16.5949786 -3.3162462 -14.9177477 -13.9091540 -10.4959331
## [25] -6.7994221 -9.8619573 -11.6744305 -8.6133321 -4.2755790 -9.9373453
## [31] -9.8738467 -7.8602514 -12.7419835 -8.2505300 -14.8437007 -1.6814680
##
## $dbz
## [1] 8.9698701 8.1680800 6.8363320 5.0009416 2.7544483 0.3412567
## [7] -1.7935859 -3.2673252 -4.1706963 -4.8589492 -5.5349957 -6.1993758
## [13] -6.7535150 -7.1066131 -7.2645159 -7.3334652 -7.4303057 -7.6018727
## [19] -7.8158662 -8.0046486 -8.1150614 -8.1261203 -8.0434268 -7.9020723
## [25] -7.7696559 -7.7228794 -7.8076870 -8.0090148 -8.2362330 -8.3257131
## [31] -8.0933503 -7.4650627 -6.5695468 -5.6529713 -4.9335321 -4.5446295
# straight to ARMA
aic5.wge(dposrater_may,p=0:10,q=0:4)
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of aic
## p q aic
## 38 7 2 -0.3104626
## 52 10 1 -0.3080242
## 13 2 2 -0.2761237
## 51 10 0 -0.2540448
## 18 3 2 -0.2493179
# pick 7,2, AIC -9.5 vs. -7.4 on mar data start
aic5.wge(dposrater_may,p=0:10,q=0:4, type='bic')
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of bic
## p q bic
## 7 1 1 -0.1463971
## 11 2 0 -0.1393156
## 13 2 2 -0.1192430
## 8 1 2 -0.1034945
## 12 2 1 -0.1000530
# pick 1, 1, BIC=-9.3567
# estimate aic pick model phi and theta
estaic_may <- est.arma.wge(dposrater_may, p=7, q=2, factor = T)
##
## Coefficients of Original polynomial:
## 0.6672 0.7216 -0.2318 -0.0038 0.0350 0.0791 -0.2982
##
## Factor Roots Abs Recip System Freq
## 1-1.9829B+0.9894B^2 1.0021+-0.0810i 0.9947 0.0128
## 1+0.9249B -1.0812 0.9249 0.5000
## 1+0.9977B+0.5764B^2 -0.8654+-0.9929i 0.7592 0.3641
## 1-0.6068B+0.5654B^2 0.5366+-1.2168i 0.7519 0.1839
##
##
f_dposr_may <- fore.aruma.wge(dposrater_may, phi = estaic_may$phi,theta = estaic_may$theta, n.ahead = 20, lastn= T, limits=F )
# ASE
ase_dposrater_may = mean((f_dposr_may$f - dposrater_may[(length(dposrater_may)-20+1):length(dposrater_may)])^2)
ase_dposrater_may
## [1] 3.689831
# ASE 3.689831
# complete the model, test residual
ljung.wge(f_dposr_may$res,p=7,q=2)
## Obs -0.01046769 -0.03701726 0.02093265 0.004374793 0.08179569 -0.03834407 0.1781997 0.03366438 -0.0241363 0.001220277 -0.1151678 -0.1432698 0.01303198 0.2018446 0.02189474 -0.2666775 0.0101408 0.04010955 -0.1782199 -0.06381621 0.04755497 -0.04394528 0.06636473 -0.04428351
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 24
##
## $chi.square
## [1] 22.23227
##
## $df
## [1] 15
##
## $pval
## [1] 0.1018566
# pval = 0.1018566
plotts.wge(f_dposr_may$res)
# forecast out
f_dposr_may_7 <- fore.aruma.wge(dposrater_may, phi = estaic_may$phi,theta = estaic_may$theta, n.ahead = 7, lastn= F, limits=T )
f_dposr_may_90 <- fore.aruma.wge(dposrater_may, phi = estaic_may$phi,theta = estaic_may$theta, n.ahead = 90, lastn= F, limits=T )
#not too happy, but does do pretty good job on long term, model of choice for us long term without MLP
us5 = ts(us4$posrate)
# rolling ASE
trainingSize = 50
horizon = 7
ASEHolder = numeric()
MeanHolder = numeric()
for( i in 1:(73-(trainingSize + horizon) + 1))
{
forecasts_aruma = fore.aruma.wge(dposrater_may[i:(i+(trainingSize-1))], d=0, phi = estaic_may$phi,theta = estaic_may$theta, n.ahead = horizon)
ASE = mean((us5[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts_aruma$f)^2)
ASEHolder[i] = ASE
MeanHolder[i] = forecasts_aruma$f
}
## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length
## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length
## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length
## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length
## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length
## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length
## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length
## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length
## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length
## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length
## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length
## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length
## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length
## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length
## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length
## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length
## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length
ASEHolder # taking 10 mins to run
## [1] 5.425324 29.145427 12.978260 3.377284 2.265916 1.044848 1.040106
## [8] 5.505741 9.146799 7.417582 8.226351 2.577548 1.385351 2.279227
## [15] 2.751509 5.012060 1.180703
MeanHolder
## [1] 4.881010 3.573048 4.848994 5.986529 6.222015 7.134091 7.048535 6.286977
## [9] 5.695387 6.909940 6.506387 7.289995 7.659611 7.781025 7.727248 7.554135
## [17] 8.108002
hist(ASEHolder)
WindowedASE = mean(ASEHolder)
summary(ASEHolder)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.040 2.266 3.377 5.927 7.418 29.145
WindowedASE #5.927061
## [1] 5.927061
# try diff once
dposrater_may_d1 <- artrans.wge(dposrater_may,phi.tr = 1)
aic5.wge(dposrater_may_d1,p=0:15,q=0:4)
## ---------WORKING... PLEASE WAIT...
##
##
## Error in aic calculation at 4 2
## Error in aic calculation at 15 0
## Error in aic calculation at 15 1
## Five Smallest Values of aic
## p q aic
## 73 14 2 -0.6180425
## 78 15 2 -0.6028138
## 79 15 3 -0.4969102
## 68 13 2 -0.4967744
## 66 13 0 -0.3751314
# pick 14, 2, AIC=-9.8, tiny larger. not worth the effort of extra degrees
# estimate d1 pick model phi and theta
estaic_may_d1 <- est.arma.wge(dposrater_may_d1, p=14, q=2, factor = T)
##
## Coefficients of Original polynomial:
## -1.3939 -1.4922 -0.9023 -0.3904 0.0533 0.5522 0.8376 1.1217 0.9843 0.8481 0.2600 -0.1777 -0.6017 -0.3647
##
## Factor Roots Abs Recip System Freq
## 1+1.0904B+0.9744B^2 -0.5595+-0.8445i 0.9871 0.3431
## 1+0.4407B+0.9620B^2 -0.2291+-0.9935i 0.9808 0.2861
## 1-1.2546B+0.9616B^2 0.6524+-0.7838i 0.9806 0.1395
## 1-0.4112B+0.9281B^2 0.2215+-1.0141i 0.9634 0.2158
## 1+0.9617B -1.0398 0.9617 0.5000
## 1+1.5692B+0.9156B^2 -0.8569+-0.5982i 0.9569 0.4030
## 1-0.9180B 1.0893 0.9180 0.0000
## 1-0.7777B 1.2859 0.7777 0.0000
## 1+0.6934B -1.4421 0.6934 0.5000
##
##
f_dposr_may_d1 <- fore.aruma.wge(dposrater_may, d=1,phi = estaic_may_d1$phi,theta = estaic_may_d1$theta, n.ahead = 20, lastn= T, limits=F )
# looking good
# ASE
ase_dposrater_may_d1 = mean((f_dposr_may_d1$f - dposrater_may[(length(dposrater_may)-20+1):length(dposrater_may)])^2)
ase_dposrater_may_d1
## [1] 0.4512103
# ASE 0.4512103
# complete the model, test residual
ljung.wge(f_dposr_may_d1$res,p=14,q=2)
## Obs -0.06181709 -0.1193952 0.04077549 -0.02389429 -0.02705438 -0.01684004 -0.01841777 -0.1925159 -0.01702458 0.03846218 -0.1202376 0.1332637 0.03500123 0.002342406 0.1593079 0.09893685 -0.07997416 -0.1485467 -0.04379409 -0.03564445 -0.09591164 -0.1030234 0.1236619 -0.01867997
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 24
##
## $chi.square
## [1] 18.23846
##
## $df
## [1] 8
##
## $pval
## [1] 0.01950805
# pval = 0.01950805, not white noise
plotts.wge(f_dposr_may_d1$res)
# forecast out
f_dposr_may_d1_7 <- fore.aruma.wge(dposrater_may, d=1, phi = estaic_may_d1$phi,theta = estaic_may_d1$theta, n.ahead = 7, lastn= F, limits=T )
f_dposr_may_d1_90 <- fore.aruma.wge(dposrater_may, d=1, phi = estaic_may_d1$phi,theta = estaic_may_d1$theta, n.ahead = 90, lastn= F, limits=T )
#short term looks good, but not long term.
# chosen for short term us positive rate forecast, calculate rolling ASE
trainingSize = 50
horizon = 7
ASEHolder = numeric()
for( i in 1:(73-(trainingSize + horizon) + 1))
{
forecasts = fore.aruma.wge(dposrater_may[i:(i+(trainingSize-1))], d=1, phi = estaic_may_d1$phi,theta = estaic_may_d1$theta, n.ahead = horizon)
ASE = mean((us5[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts$f)^2)
ASEHolder[i] = ASE
}
ASEHolder # taking 10 mins to run
## [1] 0.5696323 0.5377582 1.0019927 0.4707286 0.5493673 0.9692764 0.3692563
## [8] 0.9992082 1.1433232 0.8044940 0.6157031 0.5826998 0.6736881 1.0265688
## [15] 0.2698419 0.4965949 1.0957465
hist(ASEHolder)
WindowedASE = mean(ASEHolder)
summary(ASEHolder)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.2698 0.5378 0.6157 0.7162 0.9992 1.1433
WindowedASE #0.7162283
## [1] 0.7162283
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# try to see if diff make sense on count data
dposr_d1 <- artrans.wge(dposr, phi.tr = 1)
# looks good, now near 0 frequency is gone, higher frequency shows up. ACF show cyclic behavior
aic5.wge(dposr_d1,p=0:10,q=0:4)
## ---------WORKING... PLEASE WAIT...
##
##
## Error in aic calculation at 3 2
## Error in aic calculation at 3 4
## Error in aic calculation at 4 3
## Error in aic calculation at 4 4
## Error in aic calculation at 5 2
## Error in aic calculation at 5 3
## Error in aic calculation at 5 4
## Error in aic calculation at 7 4
## Error in aic calculation at 8 3
## Error in aic calculation at 9 3
## Error in aic calculation at 10 2
## Five Smallest Values of aic
## p q aic
## 33 6 2 15.66430
## 38 7 2 15.67852
## 34 6 3 15.67977
## 39 7 3 15.68838
## 41 8 0 15.68977
# pick 6,2 AIC 15.66, not changing much
estaic_d1_ct <- est.arma.wge(dposr_d1, p=6, q=2, factor = T)
##
## Coefficients of Original polynomial:
## 0.9223 -0.4204 -0.1074 0.1150 -0.1663 0.4261
##
## Factor Roots Abs Recip System Freq
## 1-1.2326B+0.9650B^2 0.6387+-0.7927i 0.9823 0.1421
## 1-0.9140B 1.0941 0.9140 0.0000
## 1+0.7878B -1.2693 0.7878 0.5000
## 1+0.4365B+0.6132B^2 -0.3560+-1.2264i 0.7831 0.2950
##
##
f_dposr_d1_ct <- fore.aruma.wge(dposr, d=1,phi = estaic_d1_ct$phi,theta = estaic_d1_ct$theta, n.ahead = 20, lastn= T, limits=F )
# ASE
ase_dposr_d1_ct = mean((f_dposr_d1_ct$f - dposr[length(f_dposr_d1_ct$f)-20+1:length(f_dposr_d1_ct$f)])^2)
ase_dposr_d1_ct
## [1] 1860690299
# 1860690299
# residual white noise test
ljung.wge(f_dposr_d1_ct$res,p=6,q=2)
## Obs -0.01040152 -0.0005907605 0.0005360688 -0.009483016 0.02118629 -0.01015082 0.01665095 0.03162182 -0.002579604 0.06602495 -0.0182375 -0.0342344 0.004088701 0.06722949 -0.003539669 -0.03814393 0.01140429 0.07346267 -0.1307832 -0.007597847 -0.001230379 0.02043867 -0.05278242 -0.02973383
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 24
##
## $chi.square
## [1] 6.331486
##
## $df
## [1] 16
##
## $pval
## [1] 0.9841029
# pval = 0.9841029, can't reject Ho which is white noise
# plot the residual
plotts.wge(f_dposr_d1_ct$res)
#forecast out
f_dposr_d1_ct_7 <- fore.aruma.wge(dposr, d=1,phi = estaic_d1_ct$phi,theta = estaic_d1_ct$theta, n.ahead = 7, lastn= F, limits=T )
f_dposr_d1_ct_90 <- fore.aruma.wge(dposr, d=1,phi = estaic_d1_ct$phi,theta = estaic_d1_ct$theta, n.ahead = 90, lastn= F, limits=T )
# give up on count analysis, project will focus on positive rate
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# try a 7 days cycle on count data
dposr_s7 <- artrans.wge(dposr, phi.tr = c(0,0,0,0,0,0,1))
# ACF damping fast
aic5.wge(dposr_s7,p=0:10,q=0:4)
## ---------WORKING... PLEASE WAIT...
##
##
## Error in aic calculation at 2 2
## Error in aic calculation at 2 3
## Error in aic calculation at 3 2
## Error in aic calculation at 4 1
## Error in aic calculation at 4 3
## Error in aic calculation at 4 4
## Error in aic calculation at 5 1
## Error in aic calculation at 5 2
## Error in aic calculation at 5 3
## Error in aic calculation at 5 4
## Error in aic calculation at 6 3
## Error in aic calculation at 7 4
## Error in aic calculation at 8 4
## Five Smallest Values of aic
## p q aic
## 41 8 0 15.82318
## 37 7 1 15.83336
## 46 9 0 15.83697
## 39 7 3 15.83701
## 42 8 1 15.83741
# pick 8,0 AIC 15.82, not changing much
estaic_s7_ct <- est.arma.wge(dposr_s7, p=8, q=0, factor = T)
##
## Coefficients of Original polynomial:
## 0.6576 0.1678 0.0280 0.1679 -0.0174 0.1951 -0.5637 0.2718
##
## Factor Roots Abs Recip System Freq
## 1+0.9658B -1.0354 0.9658 0.5000
## 1-0.9181B 1.0892 0.9181 0.0000
## 1+1.0820B+0.8127B^2 -0.6656+-0.8873i 0.9015 0.3524
## 1-0.4572B+0.7852B^2 0.2911+-1.0903i 0.8861 0.2085
## 1-1.3300B+0.4803B^2 1.3846+-0.4062i 0.6930 0.0454
##
##
f_dposr_s7_ct <- fore.aruma.wge(dposr, s=7,phi = estaic_s7_ct$phi,theta = estaic_s7_ct$theta, n.ahead = 20, lastn= T, limits=F )
#still not very good, much lower than original data
#ASE
ase_dposr_s7_ct = mean((f_dposr_s7_ct$f - dposr[length(f_dposr_s7_ct$f)-20+1:length(f_dposr_s7_ct$f)])^2)
ase_dposr_s7_ct
## [1] 1878125434
# 1878125434, not changing much
# residual white noise test
ljung.wge(f_dposr_s7_ct$res,p=8,q=0)
## Obs 0.01375532 -0.0008986191 -0.006465254 -0.02174787 0.02371531 0.04285527 -0.09327852 0.07453255 0.02729956 0.01647333 -0.02750212 0.03522911 0.08293654 -0.1403013 0.01589859 0.0296336 0.002374378 0.05289711 -0.0823588 -0.02831454 -0.02719726 -0.0117039 -0.03319747 0.002022641
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 24
##
## $chi.square
## [1] 9.103533
##
## $df
## [1] 16
##
## $pval
## [1] 0.9090886
# pval = 0.9090886, lower, still good. can't reject Ho which is white noise
# plot the residual
plotts.wge(f_dposr_s7_ct$res)
#forecast out
f_dposr_s7_ct_7 <- fore.aruma.wge(dposr, s=7,phi = estaic_s7_ct$phi,theta = estaic_s7_ct$theta, n.ahead = 7, lastn= F, limits=T )
f_dposr_s7_ct_90 <- fore.aruma.wge(dposr, s=7,phi = estaic_s7_ct$phi,theta = estaic_s7_ct$theta, n.ahead = 90, lastn= F, limits=T )
#################################### VAR, MLP, Rolling ASE for US ########################
#us3
#us2$hospitalizedCurrently 0.722
#us2$deathIncrease 0.24
#us2$inIcuCurrently 0.205
# we'll move forward for multi variants analysis factor these 3 variables
ccf(us4$posrate,us4$hospitalized)
ccf(us4$posrate,us4$deathIncrease)
ccf(us4$posrate,us4$inIcu)
VARselect(us4,type='both',lag.max=16) #7, many Infs
## $selection
## AIC(n) HQ(n) SC(n) FPE(n)
## 14 14 14 14
##
## $criteria
## 1 2 3 4 5
## AIC(n) 3.393755e+01 3.381777e+01 3.395491e+01 3.391049e+01 3.398697e+01
## HQ(n) 3.427186e+01 3.437496e+01 3.473498e+01 3.491344e+01 3.521280e+01
## SC(n) 3.479778e+01 3.525149e+01 3.596211e+01 3.649119e+01 3.714116e+01
## FPE(n) 5.498560e+14 4.934478e+14 5.810818e+14 5.833917e+14 6.817288e+14
## 6 7 8 9 10
## AIC(n) 3.379562e+01 3.376441e+01 3.379794e+01 3.335921e+01 3.242349e+01
## HQ(n) 3.524432e+01 3.543599e+01 3.569239e+01 3.547654e+01 3.476370e+01
## SC(n) 3.752329e+01 3.806557e+01 3.867259e+01 3.880735e+01 3.844512e+01
## FPE(n) 6.358035e+14 7.374125e+14 9.887555e+14 9.276070e+14 6.301837e+14
## 11 12 13 14 15 16
## AIC(n) 3.104646e+01 2.819272e+01 -13.162250113 -Inf -Inf -Inf
## HQ(n) 3.360954e+01 3.097869e+01 -10.153410500 -Inf -Inf -Inf
## SC(n) 3.764157e+01 3.536132e+01 -5.420161098 -Inf -Inf -Inf
## FPE(n) 3.674662e+14 8.578404e+13 0.001840854 0 0 0
lsfit_us4=VAR(us4, p=8, type ='both')###############
# short term forecast VAR US
preds_us4=predict(lsfit_us4,n.ahead=7)
preds_us4$fcst$posrate[1:7,1]
## [1] 10.177700 11.510468 7.554877 9.033158 7.683985 7.470714 7.032057
#fanchart(preds_us4)
# making graph for 8 ahead prediction
#us4_predgraph = c(us4$posrate,rep('na',7))
plot(us4$posrate, type="o",xlim=c(1,81),ylim=c(-6,15))
points(seq(74,80,1),preds_us4$fcst$posrate[1:7,1], type='o',col="blue")
# long term forecast VAR US
preds_us4L=predict(lsfit_us4,n.ahead=90)
preds_us4L$fcst$posrate[1:90,1]
## [1] 10.1777004 11.5104676 7.5548772 9.0331583 7.6839851
## [6] 7.4707137 7.0320572 7.1980868 9.1008139 9.4804441
## [11] 8.2259180 8.1660903 6.0949405 4.1022494 3.2807808
## [16] 1.6878357 3.8696523 4.6755316 5.1433971 5.4077484
## [21] 2.8775605 -0.5807645 -4.2229234 -7.4240941 -6.8022557
## [26] -5.4083456 -3.1302017 -0.6095270 -2.2620118 -6.2219996
## [31] -13.1515617 -20.3006638 -23.5869966 -23.7022419 -19.7192221
## [36] -13.5690574 -10.6946722 -12.3164181 -20.7368168 -33.2876307
## [41] -44.1329109 -50.5899560 -48.1919268 -38.3716289 -27.1388097
## [46] -20.0782161 -24.0957446 -39.5683759 -60.9272607 -80.9718260
## [51] -89.0429999 -81.0711788 -60.6410955 -36.8058252 -24.1992224
## [56] -31.7396542 -59.9117578 -99.8513103 -132.8910845 -142.9411182
## [61] -122.8899937 -79.1785383 -33.4728716 -9.7525900 -25.1846549
## [66] -79.9259122 -151.8654693 -208.3298847 -218.7448997 -171.3330278
## [71] -83.9062392 4.0312521 45.6383331 10.7470656 -94.2740127
## [76] -227.1778047 -323.6129935 -328.2104501 -225.6841714 -52.4039578
## [81] 111.9007323 180.1599079 102.8339932 -102.8005632 -348.9702796
## [86] -513.5071046 -498.3468431 -282.2752258 53.8937062 355.1147335
# making graph for 90 ahead prediction
plot(us4$posrate, type="o",xlim=c(1,164),ylim=c(-20,15))
points(seq(74,163,1),preds_us4L$fcst$posrate[1:90,1], type='o',col="blue")
# looks only following the upwards trend, not good.
#try to put few variables in exogen instead######################################################
us4_var1 = data.frame(hospitalized=us4$hospitalized,posrate=us4$posrate)
us4_var_exo1 = data.frame(deathIncrease=us4$deathIncrease,inIcu=us4$inIcu)
# exclude deathIncrease, forecast inlcude too many negative value
us4_var_exo2 = data.frame(inIcu=us4$inIcu)
VARselect(us4_var1,type='both',exogen=us4_var_exo2)
## $selection
## AIC(n) HQ(n) SC(n) FPE(n)
## 2 2 2 2
##
## $criteria
## 1 2 3 4 5
## AIC(n) 12.99685 12.84292 12.89945 12.96475 12.96783
## HQ(n) 13.13065 13.03023 13.14028 13.25910 13.31570
## SC(n) 13.33703 13.31917 13.51177 13.71315 13.85230
## FPE(n) 441317.45556 378798.13571 401669.20174 430189.38883 433589.89452
## 6 7 8 9 10
## AIC(n) 12.84663 12.90337 12.94491 13.00587 12.99910
## HQ(n) 13.24801 13.35827 13.45333 13.56780 13.61456
## SC(n) 13.86717 14.05998 14.23760 14.43462 14.56393
## FPE(n) 386645.57666 412824.56086 435227.45547 469219.78481 474360.40778
# AIC 2, SC 2
lsfit_us4_var1=VAR(us4_var1, p=2, type ='both',exogen=us4_var_exo2)
# short term forecast VAR1 US
#dummy exogen for forecast, we did forecast using univariant MLP for all variables
hospitalized = ts(us4$hospitalized)
fitmlp_hospitalized7=mlp(hospitalized,rep=30)
f_fitmlp_hospitalized7=forecast(fitmlp_hospitalized7,h=7)
plot(f_fitmlp_hospitalized7)
hospitalized7 = ts(c(us4$hospitalized, f_fitmlp_hospitalized7$mean))
deathIncrease = ts(us4$deathIncrease)
fitmlp_deathIncrease7=mlp(deathIncrease,rep=30)
f_fitmlp_deathIncrease7=forecast(fitmlp_deathIncrease7,h=7)
plot(f_fitmlp_deathIncrease7)
deathIncrease7 = ts(c(us4$deathIncrease, f_fitmlp_deathIncrease7$mean))# contain negative value in forecast, make no sense
inIcu = ts(us4$inIcu)
fitmlp_inIcu7=mlp(inIcu,rep=30)
f_fitmlp_inIcu7=forecast(fitmlp_inIcu7,h=7)
plot(f_fitmlp_inIcu7)
inIcu7 = ts(c(us4$inIcu, f_fitmlp_inIcu7$mean))
us6_var1_dummy1 = data.frame(inIcu=f_fitmlp_inIcu7$mean,deathIncrease=f_fitmlp_deathIncrease7$mean)
us6_var1_dummy2 = data.frame(inIcu=f_fitmlp_inIcu7$mean)
preds_us4_var1=predict(lsfit_us4_var1,n.ahead=7,dumvar= us6_var1_dummy2)
preds_us4_var1$fcst$posrate[1:7,1]
## [1] 8.307927 8.802843 8.856643 8.916684 8.913726 8.839815 8.781388
# all negative forecast either with or without deathIncrease in exogen, doesn't make sense. try to not use any exogen
VARselect(us4_var1,type='both')
## $selection
## AIC(n) HQ(n) SC(n) FPE(n)
## 6 6 2 6
##
## $criteria
## 1 2 3 4 5
## AIC(n) 13.41537 13.19125 13.30293 13.26517 13.32024
## HQ(n) 13.52240 13.35181 13.51700 13.53276 13.64135
## SC(n) 13.68751 13.59947 13.84722 13.94553 14.13668
## FPE(n) 670451.96151 536279.38260 600603.14043 579883.51451 615176.14008
## 6 7 8 9 10
## AIC(n) 12.96369 13.01147 13.04878 13.11047 13.11324
## HQ(n) 13.33832 13.43961 13.53044 13.64565 13.70194
## SC(n) 13.91619 14.10005 14.27343 14.47119 14.61003
## FPE(n) 433120.76486 457805.29001 479976.81921 517059.87816 526798.63612
# AIC 6, SC 2
lsfit_us4_var1=VAR(us4_var1, p=6, type ='both')
# short term forecast VAR1 US
preds_us4_var1=predict(lsfit_us4_var1,n.ahead=7)
preds_us4_var1$fcst$posrate[1:7,1]
## [1] 6.806429 8.781841 8.257319 8.120434 7.739955 7.970919 6.653913
# making graph for 7 ahead prediction
plot(us4_var1$posrate, type="o",xlim=c(1,81),ylim=c(1,15))
points(seq(74,80,1),preds_us4_var1$fcst$posrate[1:7,1], type='o',col="blue")
# long term forecast VAR US
preds_us4L_var1=predict(lsfit_us4_var1,n.ahead=90)
preds_us4L_var1$fcst$posrate[1:90,1]
## [1] 6.8064291 8.7818408 8.2573192 8.1204337 7.7399552
## [6] 7.9709193 6.6539134 6.6929143 6.5985146 6.3767170
## [11] 6.0949278 5.8333064 5.3532196 4.5397360 4.0446575
## [16] 3.4206201 2.9813754 2.2690500 1.8499865 1.0324457
## [21] 0.2106176 -0.7451058 -1.5912829 -2.6459346 -3.5643894
## [26] -4.5273223 -5.6046888 -6.7824202 -8.0161039 -9.3494171
## [31] -10.7753136 -12.1705204 -13.6430699 -15.1568064 -16.7735223
## [36] -18.4436695 -20.2549121 -22.1254575 -24.0758632 -26.0719063
## [41] -28.1540137 -30.2941554 -32.5280900 -34.8658841 -37.3031521
## [46] -39.8290805 -42.4425486 -45.1435547 -47.9192300 -50.7931278
## [51] -53.7662611 -56.8487731 -60.0332772 -63.3291997 -66.7216918
## [56] -70.2147667 -73.8080892 -77.5108606 -81.3236668 -85.2539243
## [61] -89.3022331 -93.4663524 -97.7447216 -102.1377213 -106.6478035
## [66] -111.2766709 -116.0304294 -120.9104824 -125.9185104 -131.0531822
## [71] -136.3157336 -141.7050159 -147.2235124 -152.8734709 -158.6579681
## [76] -164.5782515 -170.6357650 -176.8306175 -183.1627205 -189.6327509
## [81] -196.2420398 -202.9923897 -209.8853220 -216.9224786 -224.1043462
## [86] -231.4313118 -238.9034546 -246.5214634 -254.2859770 -262.1982165
# making graph for 90 ahead prediction
plot(us4_var1$posrate, type="o",xlim=c(1,164),ylim=c(-5,11))
points(seq(74,163,1),preds_us4L_var1$fcst$posrate[1:90,1], type='o',col="blue")
# set negative value to 0, plot better graph for visualization
for (i in 1:length(preds_us4L_var1$fcst$posrate)) {
if (preds_us4L_var1$fcst$posrate[i]<0.0) preds_us4L_var1$fcst$posrate[i]=0.0
}
plot(us5, type = "l",xlim=c(1,163),ylim=c(-1,12))
lines(seq(74,163,1),preds_us4L_var1$fcst$posrate[1:90,1],col = "blue")
# Rolling ASE
trainingSize = 50
horizon = 7
ASEHolder = numeric()
for( i in 1:(73-(trainingSize + horizon) + 1))
{
usp = us4_var1[i:(i+trainingSize),]
lsfit_us4_var1 = VAR(usp,p=6)
forecasts = predict(lsfit_us4_var1,n.ahead = horizon)
ASE = mean((us4_var1$posrate[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts$fcst$posrate)^2)
ASEHolder[i] = ASE
}
ASEHolder
## [1] 9.055827 10.101641 10.687193 10.924849 12.169259 11.624482 11.228665
## [8] 12.814493 13.097033 13.808635 13.992837 14.199188 15.651549 15.768550
## [15] 15.306078 15.233905 14.224059
hist(ASEHolder)
WindowedASE = mean(ASEHolder)
summary(ASEHolder)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9.056 11.229 13.097 12.935 14.224 15.769
WindowedASE #12.9346
## [1] 12.9346
us5 = ts(us4$posrate)
library(nnfor)
set.seed(5)
fitmlp_usu=mlp(us5,rep=100)
# short term forecast MLP uni US
f_fitmlp_usu=forecast(fitmlp_usu,h=7)
plot(f_fitmlp_usu)
# long term forecasst MLP uni us
f_fitmlp_usuL=forecast(fitmlp_usu,h=90)
plot(f_fitmlp_usuL)
# graph better long term forecast with mean only
# there are some negative rate which does not make sense. normalize nagative to 0
for (i in 1:length(f_fitmlp_usuL$mean)) {
if (f_fitmlp_usuL$mean[i]<0.0) f_fitmlp_usuL$mean[i]=0.0
}
plot(us5, type = "l",xlim=c(1,163),ylim=c(-1,12))
lines(seq(74,163,1),f_fitmlp_usuL$mean,col = "blue")
# rolling ASE
trainingSize = 50
horizon = 7
ASEHolder = numeric()
for( i in 1:(73-(trainingSize + horizon) + 1))
{
usp = ts(us5[i:(i+trainingSize)])
fitmlp_us5 = mlp(usp,rep=25)
forecasts = forecast(fitmlp_us5,h = horizon)
ASE = mean((us5[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts$mean)^2)
ASEHolder[i] = ASE
}
ASEHolder # taking 10 mins to run
## [1] 1.3385616 0.3610328 1.5497740 1.3087507 1.7626256 1.9780489 1.8837602
## [8] 2.6597329 1.3403879 1.6741456 2.1399284 1.1686850 2.8522299 0.5572741
## [15] 0.9798475 1.7451546 4.7060941
hist(ASEHolder)
WindowedASE = mean(ASEHolder)
summary(ASEHolder)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.361 1.309 1.674 1.765 1.978 4.706
WindowedASE #1.765061
## [1] 1.765061
#short term
# choose ARIMA(14,1,2), no ensemble. MLP sequentially decreasing, so is also ARMA(7.2)
# long term
ensemble_usL_uni = (f_fitmlp_usuL$mean + f_dposr_may_d1_90$f + f_dposr_may_90$f)/3
#8.255756 10.020159 10.202144 11.270059 10.230565 10.281909 9.750516
plot(us5, type = "l",xlim=c(1,163),ylim=c(1,12))
lines(seq(74,163,1),ensemble_usL_uni,col = "blue")
set.seed(5)
# forecast regressor value in order to MLP posrate forecast into the future.. short term
hospitalized = ts(us4$hospitalized)
fitmlp_hospitalized7=mlp(hospitalized,rep=30)
f_fitmlp_hospitalized7=forecast(fitmlp_hospitalized7,h=7)
plot(f_fitmlp_hospitalized7)
hospitalized7 = ts(c(us4$hospitalized, f_fitmlp_hospitalized7$mean))
deathIncrease = ts(us4$deathIncrease)
fitmlp_deathIncrease7=mlp(deathIncrease,rep=30)
f_fitmlp_deathIncrease7=forecast(fitmlp_deathIncrease7,h=7)
plot(f_fitmlp_deathIncrease7)
deathIncrease7 = ts(c(us4$deathIncrease, f_fitmlp_deathIncrease7$mean))# contain negative value in forecast, make no sense
inIcu = ts(us4$inIcu)
fitmlp_inIcu7=mlp(inIcu,rep=30)
f_fitmlp_inIcu7=forecast(fitmlp_inIcu7,h=7)
plot(f_fitmlp_inIcu7)
inIcu7 = ts(c(us4$inIcu, f_fitmlp_inIcu7$mean))
# setup regressor df
us6 = data.frame(hospitalized=hospitalized7,deathIncrease=deathIncrease7,inIcu=inIcu7)
us6_mlp1 = data.frame(hospitalized=hospitalized7)
fitmlp_usm=mlp(us5,rep=30, xreg=us6_mlp1)
fitmlp_usm
## MLP fit with 5 hidden nodes and 30 repetitions.
## Series modelled in differences: D1.
## Univariate lags: (1,2,3)
## 1 regressor included.
## - Regressor 1 lags: (1,2)
## Forecast combined using the median operator.
## MSE: 0.115.
f_fitmlp_usm=forecast(fitmlp_usm,h=7,xreg =us6_mlp1)
plot(f_fitmlp_usm)
# forecast regressor value in order to MLP posrate forecast into the future.. long term
#hospitalized = ts(us4$hospitalized)
#fitmlp_hospitalized7=mlp(hospitalized,rep=30)
f_fitmlp_hospitalized90=forecast(fitmlp_hospitalized7,h=90)
plot(f_fitmlp_hospitalized90)
hospitalized90 = ts(c(us4$hospitalized, f_fitmlp_hospitalized90$mean))
#deathIncrease = ts(us4$deathIncrease)
#fitmlp_deathIncrease90=mlp(deathIncrease,rep=30)
f_fitmlp_deathIncrease90=forecast(fitmlp_deathIncrease7,h=90)
plot(f_fitmlp_deathIncrease90)
deathIncrease90 = ts(c(us4$deathIncrease, f_fitmlp_deathIncrease90$mean)) # consider exclude this, too many negative values
#inIcu = ts(us4$inIcu)
#fitmlp_inIcu90=mlp(inIcu,rep=30)
f_fitmlp_inIcu90=forecast(fitmlp_inIcu7,h=90)
plot(f_fitmlp_inIcu90)
inIcu90 = ts(c(us4$inIcu, f_fitmlp_inIcu90$mean))
# setup regressor df
us6L = data.frame(hospitalized=hospitalized90,deathIncrease=deathIncrease90,inIcu=inIcu90)
us6L_mlp1 = data.frame(hospitalized=hospitalized90)
set.seed(5)
fitmlp_usmL=mlp(us5,rep=30, xreg=us6L_mlp1)
fitmlp_usmL
## MLP fit with 5 hidden nodes and 30 repetitions.
## Series modelled in differences: D1.
## Univariate lags: (1,2,3)
## 1 regressor included.
## - Regressor 1 lags: (1,2)
## Forecast combined using the median operator.
## MSE: 0.1481.
f_fitmlp_usmL=forecast(fitmlp_usmL,h=90,xreg =us6L_mlp1)
plot(f_fitmlp_usmL)
# set negative value to 0, plot better graph for visilization
for (i in 1:length(f_fitmlp_usmL$mean)) {
if (f_fitmlp_usmL$mean[i]<0.0) f_fitmlp_usmL$mean[i]=0.0
}
plot(us5, type = "l",xlim=c(1,163),ylim=c(-1,12))
lines(seq(74,163,1),f_fitmlp_usmL$mean,col = "blue")
# rolling ASE
trainingSize = 50
horizon = 7
ASEHolder = numeric()
for( i in 1:(73-(trainingSize + horizon) + 1))
{
usp = ts(us5[i:(i+trainingSize)])
usR = us6L[i:(i+trainingSize+horizon),]
fitmlp_us6 = mlp(usp,rep=30,xreg=usR)
forecasts = forecast(fitmlp_us6,h = horizon,xreg=usR)
ASE = mean((us5[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts$mean)^2)
ASEHolder[i] = ASE
}
ASEHolder # taking 10 mins to run
## [1] 2.4239388 1.5049129 2.2006868 4.1302711 0.7919529 0.9542558 1.3803651
## [8] 1.1102737 1.6147460 0.8132092 2.5351754 0.8650088 2.1077007 0.5737346
## [15] 1.9161855 1.7809161 9.3917114
hist(ASEHolder)
WindowedASE = mean(ASEHolder)
summary(ASEHolder)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.5737 0.9543 1.6147 2.1232 2.2007 9.3917
WindowedASE #1.949447
## [1] 2.123238
#short term
ensemble_us = (f_fitmlp_usm$mean + preds_us4_var1$fcst$posrate[,1])/2
plot(us5, type = "l",xlim=c(1,81),ylim=c(1,12))
lines(seq(74,80,1),ensemble_us,col = "blue")
# long term
ensemble_usL = (f_fitmlp_usmL$mean + preds_us4L_var1$fcst$posrate[,1]+ ensemble_usL_uni)/3
plot(us5, type = "l",xlim=c(1,163),ylim=c(-1,12))
lines(seq(74,163,1),ensemble_usL,col = "blue")
*****************start to look into Illinois data *********************************
IL<-read.csv("/Data Folder/University/SMU/R for Time Series/Project/IL_daily.csv",header=T)
head(IL,2)
## date state positive negative pending hospitalizedCurrently
## 1 20200715 IL 157825 1922908 NA 1454
## 2 20200714 IL 156638 1885934 NA 1416
## hospitalizedCumulative inIcuCurrently inIcuCumulative onVentilatorCurrently
## 1 NA 324 NA 130
## 2 NA 333 NA 126
## onVentilatorCumulative recovered dataQualityGrade lastUpdateEt
## 1 NA NA A 7/15/2020 0:00
## 2 NA NA A 7/14/2020 0:00
## dateModified checkTimeEt death hospitalized dateChecked
## 1 2020-07-15T00:00:00Z 7/14/2020 20:00 7427 NA 2020-07-15T00:00:00Z
## 2 2020-07-14T00:00:00Z 7/13/2020 20:00 7419 NA 2020-07-14T00:00:00Z
## totalTestsViral positiveTestsViral negativeTestsViral positiveCasesViral
## 1 2079601 NA NA 156693
## 2 2041440 NA NA 155506
## deathConfirmed deathProbable fips positiveIncrease negativeIncrease total
## 1 7226 201 17 1187 36974 2080733
## 2 7218 201 17 707 27739 2042572
## totalTestResults totalTestResultsIncrease posNeg deathIncrease
## 1 2080733 38161 2080733 8
## 2 2042572 28446 2042572 25
## hospitalizedIncrease hash commercialScore
## 1 0 c26e2751113806a83fd89ca8d7ccce0bedb527d0 0
## 2 0 d735a7b1d033561a1bf7171d2483d999957e3e4a 0
## negativeRegularScore negativeScore positiveScore score grade
## 1 0 0 0 0 NA
## 2 0 0 0 0 NA
# to get to daily test positive count
plotts.wge(IL$positiveIncrease)
# exclude early days only sick people gets tested
IL1<-subset.data.frame(IL,date>20200313)
IL2<-subset.data.frame(IL,date>20200503)
plotts.wge(IL1$positiveIncrease)
summary.data.frame(us1)
## date states positive negative
## Min. :20200304 Min. :15.00 Min. : 757 Min. : 1180
## 1st Qu.:20200406 1st Qu.:56.00 1st Qu.: 380919 1st Qu.: 1615827
## Median :20200510 Median :56.00 Median :1324147 Median : 7543696
## Mean :20200496 Mean :54.93 Mean :1320544 Mean :11839006
## 3rd Qu.:20200612 3rd Qu.:56.00 3rd Qu.:2037000 3rd Qu.:20403963
## Max. :20200715 Max. :56.00 Max. :3478419 Max. :39042608
##
## pending hospitalizedCurrently hospitalizedCumulative inIcuCurrently
## Min. : 103 Min. : 325 Min. : 4 Min. : 1299
## 1st Qu.: 1894 1st Qu.:29749 1st Qu.: 49362 1st Qu.: 5627
## Median : 2758 Median :37750 Median :156192 Median : 7497
## Mean : 8594 Mean :37465 Mean :137644 Mean : 8580
## 3rd Qu.: 4202 3rd Qu.:51020 3rd Qu.:223427 3rd Qu.:12166
## Max. :65709 Max. :59539 Max. :269507 Max. :15130
## NA's :13 NA's :22
## inIcuCumulative onVentilatorCurrently onVentilatorCumulative recovered
## Min. : 74 Min. : 167 Min. : 39.0 Min. : 97
## 1st Qu.: 2370 1st Qu.:2197 1st Qu.: 227.0 1st Qu.: 92978
## Median : 7319 Median :3749 Median : 638.5 Median : 294312
## Mean : 6388 Mean :3655 Mean : 616.3 Mean : 400402
## 3rd Qu.: 9665 3rd Qu.:5187 3rd Qu.: 880.2 3rd Qu.: 680916
## Max. :12002 Max. :7070 Max. :1166.0 Max. :1075882
## NA's :21 NA's :21 NA's :28 NA's :21
## dateChecked death hospitalized lastModified
## Length:134 Min. : 16 Min. : 4 Length:134
## Class :character 1st Qu.: 12080 1st Qu.: 49362 Class :character
## Mode :character Median : 74493 Median :156192 Mode :character
## Mean : 64541 Mean :137644
## 3rd Qu.:108200 3rd Qu.:223427
## Max. :129595 Max. :269507
##
## total totalTestResults posNeg deathIncrease
## Min. : 2040 Min. : 1937 Min. : 1937 Min. : 1.0
## 1st Qu.: 2013846 1st Qu.: 1996746 1st Qu.: 1996746 1st Qu.: 412.2
## Median : 8870918 Median : 8867843 Median : 8867843 Median : 854.5
## Mean :13168144 Mean :13159550 Mean :13159550 Mean : 967.0
## 3rd Qu.:22442754 3rd Qu.:22440962 3rd Qu.:22440962 3rd Qu.:1438.0
## Max. :42523974 Max. :42521027 Max. :42521027 Max. :2740.0
##
## hospitalizedIncrease negativeIncrease positiveIncrease
## Min. :-2841.0 Min. : 464 Min. : 141
## 1st Qu.: 982.5 1st Qu.:107276 1st Qu.:19789
## Median : 1621.5 Median :280023 Median :24760
## Mean : 2011.2 Mean :291361 Mean :25954
## 3rd Qu.: 2652.2 3rd Qu.:457737 3rd Qu.:31015
## Max. :17320.0 Max. :756730 Max. :66645
##
## totalTestResultsIncrease hash
## Min. : 617 Length:134
## 1st Qu.:136983 Class :character
## Median :303788 Mode :character
## Mean :317315
## 3rd Qu.:478845
## Max. :823375
##
il_dpos <- IL1$positiveIncrease
length(il_dpos)
## [1] 124
#select all possible correlated variables for daily test positive rate
library(tidyverse)
date = rev(IL2$date)
hospitalized=rev(IL2$hospitalizedCurrently)
inIcu=rev(IL2$inIcuCurrently)
onVent=rev(IL2$onVentilatorCurrently)
recovered=rev(IL2$recovered)
deathIncrease=rev(IL2$deathIncrease)
negativeIncrease=rev(IL2$negativeIncrease)
hospitalizedIncrease=rev(IL2$hospitalizedIncrease)
pending=rev(IL2$pending)
IL3 = data.frame (hospitalized,inIcu,onVent,deathIncrease,negativeIncrease,hospitalizedIncrease)
IL3$posrate <- rev((IL2$positiveIncrease/IL2$totalTestResultsIncrease)*100)
library(GGally)
ggpairs(IL3) # too busy, looks many high corr, make a subset of IL3
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
IL3_corr = data.frame(posrate=IL3$posrate,hospitalized=IL3$hospitalized,inIcu=IL3$inIcu,onVent=IL3$onVent,deathIncrease=IL3$deathIncrease,negativeIncrease=IL3$negativeIncrease)
ggpairs(IL3_corr)
#hospitalizedCurrently 0.91
#deathIncrease 0.634
#inIcuCurrently 0.882
#onVent 0.872
#negativeIncrease -0.654
# we'll move forward for multivariant analysis factor these 5 variables
# clean dataset ready for modeling analaysis
IL4 = IL3_corr
summary(IL4)
## posrate hospitalized inIcu onVent
## Min. : 1.821 Min. :1326 Min. : 304.0 Min. :126.0
## 1st Qu.: 2.814 1st Qu.:1560 1st Qu.: 399.0 1st Qu.:216.0
## Median : 4.013 Median :2496 Median : 713.0 Median :437.0
## Mean : 6.067 Mean :2760 Mean : 733.9 Mean :427.4
## 3rd Qu.: 9.060 3rd Qu.:3914 3rd Qu.:1031.0 3rd Qu.:607.0
## Max. :16.922 Max. :4862 Max. :1266.0 Max. :780.0
## deathIncrease negativeIncrease
## Min. : 0.00 Min. :11017
## 1st Qu.: 25.00 1st Qu.:18561
## Median : 57.00 Median :21912
## Mean : 65.88 Mean :22810
## 3rd Qu.: 96.00 3rd Qu.:27039
## Max. :198.00 Max. :37940
length(IL4$posrate)#73
## [1] 73
is.na.data.frame(IL4)# no missing value
## posrate hospitalized inIcu onVent deathIncrease negativeIncrease
## [1,] FALSE FALSE FALSE FALSE FALSE FALSE
## [2,] FALSE FALSE FALSE FALSE FALSE FALSE
## [3,] FALSE FALSE FALSE FALSE FALSE FALSE
## [4,] FALSE FALSE FALSE FALSE FALSE FALSE
## [5,] FALSE FALSE FALSE FALSE FALSE FALSE
## [6,] FALSE FALSE FALSE FALSE FALSE FALSE
## [7,] FALSE FALSE FALSE FALSE FALSE FALSE
## [8,] FALSE FALSE FALSE FALSE FALSE FALSE
## [9,] FALSE FALSE FALSE FALSE FALSE FALSE
## [10,] FALSE FALSE FALSE FALSE FALSE FALSE
## [11,] FALSE FALSE FALSE FALSE FALSE FALSE
## [12,] FALSE FALSE FALSE FALSE FALSE FALSE
## [13,] FALSE FALSE FALSE FALSE FALSE FALSE
## [14,] FALSE FALSE FALSE FALSE FALSE FALSE
## [15,] FALSE FALSE FALSE FALSE FALSE FALSE
## [16,] FALSE FALSE FALSE FALSE FALSE FALSE
## [17,] FALSE FALSE FALSE FALSE FALSE FALSE
## [18,] FALSE FALSE FALSE FALSE FALSE FALSE
## [19,] FALSE FALSE FALSE FALSE FALSE FALSE
## [20,] FALSE FALSE FALSE FALSE FALSE FALSE
## [21,] FALSE FALSE FALSE FALSE FALSE FALSE
## [22,] FALSE FALSE FALSE FALSE FALSE FALSE
## [23,] FALSE FALSE FALSE FALSE FALSE FALSE
## [24,] FALSE FALSE FALSE FALSE FALSE FALSE
## [25,] FALSE FALSE FALSE FALSE FALSE FALSE
## [26,] FALSE FALSE FALSE FALSE FALSE FALSE
## [27,] FALSE FALSE FALSE FALSE FALSE FALSE
## [28,] FALSE FALSE FALSE FALSE FALSE FALSE
## [29,] FALSE FALSE FALSE FALSE FALSE FALSE
## [30,] FALSE FALSE FALSE FALSE FALSE FALSE
## [31,] FALSE FALSE FALSE FALSE FALSE FALSE
## [32,] FALSE FALSE FALSE FALSE FALSE FALSE
## [33,] FALSE FALSE FALSE FALSE FALSE FALSE
## [34,] FALSE FALSE FALSE FALSE FALSE FALSE
## [35,] FALSE FALSE FALSE FALSE FALSE FALSE
## [36,] FALSE FALSE FALSE FALSE FALSE FALSE
## [37,] FALSE FALSE FALSE FALSE FALSE FALSE
## [38,] FALSE FALSE FALSE FALSE FALSE FALSE
## [39,] FALSE FALSE FALSE FALSE FALSE FALSE
## [40,] FALSE FALSE FALSE FALSE FALSE FALSE
## [41,] FALSE FALSE FALSE FALSE FALSE FALSE
## [42,] FALSE FALSE FALSE FALSE FALSE FALSE
## [43,] FALSE FALSE FALSE FALSE FALSE FALSE
## [44,] FALSE FALSE FALSE FALSE FALSE FALSE
## [45,] FALSE FALSE FALSE FALSE FALSE FALSE
## [46,] FALSE FALSE FALSE FALSE FALSE FALSE
## [47,] FALSE FALSE FALSE FALSE FALSE FALSE
## [48,] FALSE FALSE FALSE FALSE FALSE FALSE
## [49,] FALSE FALSE FALSE FALSE FALSE FALSE
## [50,] FALSE FALSE FALSE FALSE FALSE FALSE
## [51,] FALSE FALSE FALSE FALSE FALSE FALSE
## [52,] FALSE FALSE FALSE FALSE FALSE FALSE
## [53,] FALSE FALSE FALSE FALSE FALSE FALSE
## [54,] FALSE FALSE FALSE FALSE FALSE FALSE
## [55,] FALSE FALSE FALSE FALSE FALSE FALSE
## [56,] FALSE FALSE FALSE FALSE FALSE FALSE
## [57,] FALSE FALSE FALSE FALSE FALSE FALSE
## [58,] FALSE FALSE FALSE FALSE FALSE FALSE
## [59,] FALSE FALSE FALSE FALSE FALSE FALSE
## [60,] FALSE FALSE FALSE FALSE FALSE FALSE
## [61,] FALSE FALSE FALSE FALSE FALSE FALSE
## [62,] FALSE FALSE FALSE FALSE FALSE FALSE
## [63,] FALSE FALSE FALSE FALSE FALSE FALSE
## [64,] FALSE FALSE FALSE FALSE FALSE FALSE
## [65,] FALSE FALSE FALSE FALSE FALSE FALSE
## [66,] FALSE FALSE FALSE FALSE FALSE FALSE
## [67,] FALSE FALSE FALSE FALSE FALSE FALSE
## [68,] FALSE FALSE FALSE FALSE FALSE FALSE
## [69,] FALSE FALSE FALSE FALSE FALSE FALSE
## [70,] FALSE FALSE FALSE FALSE FALSE FALSE
## [71,] FALSE FALSE FALSE FALSE FALSE FALSE
## [72,] FALSE FALSE FALSE FALSE FALSE FALSE
## [73,] FALSE FALSE FALSE FALSE FALSE FALSE
# IL4 contains all the dta required for rest of model building and analysis
#Create a line chart for daily test positive and death count
# get the range for the x and y axis
xt=c(1:length(IL1$date))
xrange <- range(xt)
# get the test positive daily count in oldest first order
il_dposr<-rev(il_dpos)
yrange <- range(il_dposr)
# get the daily death count
il_dd <- IL1$deathIncrease
il_ddr<-rev(il_dd)
plotts.wge(il_ddr)
#il_ddr = c(il_ddr,rep('na',10))
# plot the graph
plot(xt, il_dposr, type="o", lwd=0.5,
lty=1, col='blue', pch=15,cex=0.4,xlab="Date (March 14 to July 15)", ylab="Daily Test Positive / Death Count")
lines(xt, il_ddr, type="o", lwd=0.5,
lty=2, col='red', pch=16,cex=0.4)
# add a title and subtitle
title("Daily State of Illinois Test Positive & Death Count")
# add a legend
legend(100, yrange[2], legend=c("Positive", "Death"), cex=0.8, col=c("blue","red"), lty=1:2)
#Create a line chart for daily test positive and death rate
# get the range for the x and y axis
xt=c(1:length(IL1$date))
xrange <- range(xt)
# get the test positive rate
dposrate_il1 <- IL1$positiveIncrease/(IL1$positiveIncrease+IL1$negativeIncrease)
dposrater_il1<-rev(dposrate_il1)
yrange <- range(dposrater_il1)
# get the death to positive rate
ddrate_il1 <- IL1$deathIncrease/IL1$positiveIncrease
ddrater_il1<-rev(ddrate_il1)
# plot the graph
plot(xt, dposrater_il1, type="o", lwd=0.5,
lty=1, col='blue', pch=15,cex=0.4,xlab="Date (March 13 to July 15)", ylab="Daily Test Positive / Death Rate")
lines(xt, ddrater_il1, type="o", lwd=0.5,
lty=2, col='red', pch=16,cex=0.4)
# add a title and subtitle
title("Daily Illinois Test Positive & Death Rate")
# add a legend
legend(100, yrange[2], legend=c("Positive", "Death"), cex=0.8, col=c("blue","red"), lty=1:2)
#$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
plotts.sample.wge(il_dposr)
## $autplt
## [1] 1.000000000 0.796412113 0.806828728 0.736799043 0.725236670 0.700315319
## [7] 0.731508393 0.674156043 0.655915570 0.622998072 0.555249208 0.524297883
## [13] 0.477974050 0.453430523 0.433211698 0.385245848 0.353060413 0.301348967
## [19] 0.262968313 0.234679039 0.214123077 0.210215197 0.130791558 0.068548606
## [25] 0.039632961 0.001135416
##
## $freq
## [1] 0.008064516 0.016129032 0.024193548 0.032258065 0.040322581 0.048387097
## [7] 0.056451613 0.064516129 0.072580645 0.080645161 0.088709677 0.096774194
## [13] 0.104838710 0.112903226 0.120967742 0.129032258 0.137096774 0.145161290
## [19] 0.153225806 0.161290323 0.169354839 0.177419355 0.185483871 0.193548387
## [25] 0.201612903 0.209677419 0.217741935 0.225806452 0.233870968 0.241935484
## [31] 0.250000000 0.258064516 0.266129032 0.274193548 0.282258065 0.290322581
## [37] 0.298387097 0.306451613 0.314516129 0.322580645 0.330645161 0.338709677
## [43] 0.346774194 0.354838710 0.362903226 0.370967742 0.379032258 0.387096774
## [49] 0.395161290 0.403225806 0.411290323 0.419354839 0.427419355 0.435483871
## [55] 0.443548387 0.451612903 0.459677419 0.467741935 0.475806452 0.483870968
## [61] 0.491935484 0.500000000
##
## $db
## [1] 16.23481380 7.36887552 0.22486668 -0.11267743 0.06735102
## [6] -13.62918899 -3.31458032 -6.74445173 -12.86310627 -7.74797440
## [11] -9.55876424 -3.76255134 -5.33791653 -3.99314378 -6.57628793
## [16] -9.22084880 -3.35943839 -0.62576095 -11.04884186 -6.56841694
## [21] -18.14877986 -10.40638288 -8.62954959 -3.22832414 -8.08238954
## [26] -13.75991211 -11.95887210 -12.83400687 -5.24105725 -12.92896551
## [31] -9.84344495 -17.49412875 -14.00836477 -16.11830620 -9.74842872
## [36] -6.83788694 -6.85477339 -11.46349798 -12.09418720 -8.48971579
## [41] -4.77777497 -3.15833709 -11.28433776 -18.55689837 -4.48439570
## [46] -3.83873525 -10.56869637 -9.52313421 -15.52203060 -8.83351834
## [51] -16.13232064 -16.99545064 -5.24452238 -4.45771217 -9.05110156
## [56] -3.87069848 -9.23999783 -4.18557574 -3.94053536 -4.94288821
## [61] -6.58848732 -2.59515508
##
## $dbz
## [1] 10.5285511 9.9868452 9.0770422 7.7901591 6.1183074 4.0654590
## [7] 1.6765936 -0.8912392 -3.2529921 -4.8378030 -5.4016026 -5.2839142
## [13] -4.9068159 -4.5068098 -4.2035700 -4.0657608 -4.1275011 -4.3909252
## [19] -4.8289712 -5.3898182 -6.0043338 -6.5998327 -7.1207948 -7.5483623
## [25] -7.9046121 -8.2367144 -8.5915933 -8.9944633 -9.4362537 -9.8698375
## [31] -10.2175868 -10.3955513 -10.3503432 -10.0855265 -9.6552199 -9.1337358
## [37] -8.5881618 -8.0669925 -7.6012936 -7.2111066 -6.9121571 -6.7202143
## [43] -6.6519405 -6.7221359 -6.9377136 -7.2883434 -7.7335664 -8.1895519
## [49] -8.5299234 -8.6260644 -8.4229759 -7.9778688 -7.4117763 -6.8355375
## [55] -6.3162234 -5.8807002 -5.5309979 -5.2583496 -5.0526791 -4.9073770
## [61] -4.8200636 -4.7908404
#damping faster compares to us
# try to work on positive count, rather than rate
aic5.wge(il_dposr,p=0:10,q=0:4)
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of aic
## p q aic
## 28 5 2 11.96898
## 35 6 4 11.96924
## 40 7 4 11.97535
## 45 8 4 11.97569
## 31 6 0 11.98079
# pick 5,2, AIC 11.96
aic5.wge(il_dposr,p=0:10,q=0:4, type='bic')
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of bic
## p q bic
## 7 1 1 12.08159
## 11 2 0 12.10572
## 12 2 1 12.10711
## 8 1 2 12.11501
## 13 2 2 12.12728
# pick 1, 1? BIC=12.08
# estimate model phi and theta
il_estaic_ct <- est.arma.wge(il_dposr, p=5, q=2, factor = T)
##
## Coefficients of Original polynomial:
## 1.9096 -0.9278 -0.3548 0.3945 -0.0321
##
## Factor Roots Abs Recip System Freq
## 1-0.9744B 1.0262 0.9744 0.0000
## 1-1.3814B+0.6814B^2 1.0136+-0.6634i 0.8255 0.0922
## 1+0.5365B -1.8640 0.5365 0.5000
## 1-0.0902B 11.0805 0.0902 0.0000
##
##
f_il_dposr_ct <- fore.aruma.wge(il_dposr, phi = il_estaic_ct$phi,theta = il_estaic_ct$theta, n.ahead = 20, lastn= T, limits=F )
# ASE
ase_il_dposr_ct = mean((f_il_dposr_ct$f - il_dposr[length(f_il_dposr_ct$f)-20+1:length(f_il_dposr_ct$f)])^2)
ase_il_dposr_ct
## [1] 365406.4
# ASE 365406.4, too large, try to take a diff?
# complete the model, test residual
ljung.wge(f_il_dposr_ct$resid,p=5,q=2)
## Obs -0.01147759 0.01388252 0.0416049 -0.08473251 -0.07442198 0.1568715 -0.05467255 0.01164759 0.09721603 -0.05203082 0.02598788 -0.006662076 0.02082578 0.1139352 0.05008717 0.03741986 0.003962046 -0.03226451 -0.0615633 0.06745327 0.2628991 -0.02177301 -0.1217265 0.04606653
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 24
##
## $chi.square
## [1] 24.40802
##
## $df
## [1] 17
##
## $pval
## [1] 0.1087659
# pval = 0.1087659
plotts.wge(f_il_dposr_ct$res)
acf(f_il_dposr_ct$resid)
# forecast out
f_il_dposr_ct_7 <- fore.aruma.wge(il_dposr, phi = il_estaic_ct$phi,theta = il_estaic_ct$theta, n.ahead = 7, lastn= F, limits=T )
f_il_dposr_ct_90 <- fore.aruma.wge(il_dposr, phi = il_estaic_ct$phi,theta = il_estaic_ct$theta, n.ahead = 90, lastn= F, limits=T )
# decide not to work on count
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
il_dposr_d1<- artrans.wge(il_dposr,phi.tr = 1)
#second lag is large
aic5.wge(il_dposr_d1,p=0:10,q=0:4)
## ---------WORKING... PLEASE WAIT...
##
##
## Error in aic calculation at 4 4
## Five Smallest Values of aic
## p q aic
## 18 3 2 11.95551
## 30 5 4 11.96930
## 23 4 2 11.97286
## 19 3 3 11.97557
## 24 4 3 11.97827
# pick 3,2, AIC 11.95
aic5.wge(il_dposr_d1,p=0:10,q=0:4, type='bic')
## ---------WORKING... PLEASE WAIT...
##
##
## Error in aic calculation at 4 4
## Five Smallest Values of bic
## p q bic
## 2 0 1 12.06537
## 18 3 2 12.09269
## 7 1 1 12.09311
## 3 0 2 12.09633
## 6 1 0 12.11201
# pick 0, 1 BIC=12.06
# estimate model phi and theta
il_estaic_d1_ct <- est.arma.wge(il_dposr_d1, p=3, q=2, factor = T)
##
## Coefficients of Original polynomial:
## 0.9036 -0.0164 -0.3605
##
## Factor Roots Abs Recip System Freq
## 1-1.4044B+0.7198B^2 0.9755+-0.6615i 0.8484 0.0948
## 1+0.5008B -1.9967 0.5008 0.5000
##
##
f_il_dposr_d1_ct <- fore.aruma.wge(il_dposr, d=1, phi = il_estaic_d1_ct$phi,theta = il_estaic_d1_ct$theta, n.ahead = 20, lastn= T, limits=F )
# ASE
ase_il_dposr_d1_ct = mean((f_il_dposr_d1_ct$f - il_dposr_d1[length(f_il_dposr_d1_ct$f)-20+1:length(f_il_dposr_d1_ct$f)])^2)
ase_il_dposr_d1_ct
## [1] 599938.6
# ASE 599938.6, much larger, no go
# complete the model, test residual
ljung.wge(f_il_dposr_d1_ct$resid,p=3,q=2)
## Obs 0.001437668 0.02545517 0.0381145 -0.05838254 -0.05299002 0.1743374 -0.03713574 0.02233283 0.1003982 -0.04422496 0.02628858 -0.006758183 0.02240596 0.1133096 0.05248398 0.03981525 0.005025657 -0.03243836 -0.05456907 0.07989902 0.2716167 -0.01212245 -0.1154252 0.04202591
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 24
##
## $chi.square
## [1] 24.72814
##
## $df
## [1] 19
##
## $pval
## [1] 0.1696867
# pval = 0.1711935
plotts.wge(f_il_dposr_d1_ct$res)
acf(f_il_dposr_d1_ct$resid)
# forecast out
f_il_dposr_d1_ct_7 <- fore.aruma.wge(il_dposr, d=1, phi = il_estaic_d1_ct$phi,theta = il_estaic_d1_ct$theta, n.ahead = 7, lastn= F, limits=F )
f_il_dposr_d1_ct_90 <- fore.aruma.wge(il_dposr, d=1, phi = il_estaic_d1_ct$phi,theta = il_estaic_d1_ct$theta, n.ahead = 90, lastn= F, limits=F )
# decide not to work on count
############################### start using from May 4th data ###############################
#IL4 is the dataset ARMA(12,3)
plotts.sample.wge(IL4$posrate)
## $autplt
## [1] 1.00000000 0.84706216 0.83128805 0.76463795 0.72652098 0.66806587
## [7] 0.65656404 0.58804632 0.58844427 0.52522989 0.51461061 0.45186464
## [13] 0.40834268 0.40542888 0.37714113 0.35114251 0.31280904 0.27569139
## [19] 0.23423800 0.20997194 0.13705635 0.11281560 0.05964379 0.03460459
## [25] 0.02280035 0.01104549
##
## $freq
## [1] 0.01369863 0.02739726 0.04109589 0.05479452 0.06849315 0.08219178
## [7] 0.09589041 0.10958904 0.12328767 0.13698630 0.15068493 0.16438356
## [13] 0.17808219 0.19178082 0.20547945 0.21917808 0.23287671 0.24657534
## [19] 0.26027397 0.27397260 0.28767123 0.30136986 0.31506849 0.32876712
## [25] 0.34246575 0.35616438 0.36986301 0.38356164 0.39726027 0.41095890
## [31] 0.42465753 0.43835616 0.45205479 0.46575342 0.47945205 0.49315068
##
## $db
## [1] 13.4574629 5.1107843 3.3822758 0.6611536 1.1800172 -3.2278316
## [7] -5.4642859 -2.6045214 -1.2788882 -7.2501220 -3.5425945 -13.6509335
## [13] -8.6392366 -11.8799891 -4.1894316 -7.7959631 -10.9367525 -11.7157964
## [19] -11.3206333 -13.0915675 -4.9168784 -17.5880935 -5.6019065 -11.6479085
## [25] -8.4477905 -21.5028852 -3.8501267 -6.9379366 -7.2994506 -11.0910130
## [31] -17.8571548 -15.8191554 -12.0724100 -8.6386645 -3.4869624 -5.4841390
##
## $dbz
## [1] 9.4412192 8.5133298 6.9713549 4.8515255 2.2895332 -0.3637689
## [7] -2.5643194 -4.0446708 -5.0606566 -5.8974958 -6.6385691 -7.3016628
## [13] -7.8956921 -8.3949553 -8.7882797 -9.1353135 -9.4903053 -9.7860929
## [19] -9.8711977 -9.7046024 -9.4257392 -9.1881866 -9.0243456 -8.8693115
## [25] -8.6712053 -8.4748535 -8.4006542 -8.5580256 -8.9616021 -9.4489462
## [31] -9.6451720 -9.2156500 -8.2827254 -7.2683189 -6.4866336 -6.0746887
il_dposrater = IL4$posrate
# try ARMA first
aic5.wge(il_dposrater,p=0:16,q=0:4)
## ---------WORKING... PLEASE WAIT...
##
##
## Error in aic calculation at 13 4
## Five Smallest Values of aic
## p q aic
## 64 12 3 0.5271353
## 72 14 1 0.5672285
## 63 12 2 0.5851050
## 61 12 0 0.5896450
## 81 16 0 0.6073353
# pick 12,3, AIC 0.527
# estimate model phi and theta
il_estaic <- est.arma.wge(il_dposrater, p=12, q=3, factor = T)
##
## Coefficients of Original polynomial:
## -0.1173 0.1314 -0.0044 0.5008 0.3371 0.5697 0.0008 -0.0157 0.0213 -0.0607 0.0493 -0.4584
##
## Factor Roots Abs Recip System Freq
## 1+1.9460B+0.9740B^2 -0.9989+-0.1697i 0.9869 0.4732
## 1-0.5607B+0.9725B^2 0.2883+-0.9722i 0.9862 0.2041
## 1-1.9185B+0.9203B^2 1.0423+-0.0145i 0.9593 0.0022
## 1+1.2581B+0.8601B^2 -0.7313+-0.7923i 0.9274 0.3686
## 1+0.5071B+0.8318B^2 -0.3048+-1.0532i 0.9120 0.2948
## 1-1.1147B+0.7350B^2 0.7583+-0.8863i 0.8573 0.1374
##
##
f_il_dposr <- fore.aruma.wge(il_dposrater, d=0, phi = il_estaic$phi,theta = il_estaic$theta, n.ahead = 20, lastn= T, limits=F )
# ASE
ase_il_dposr = mean((f_il_dposr$f - il_dposrater[(length(il_dposrater)-20+1):length(il_dposrater)])^2)
ase_il_dposr
## [1] 0.7890908
# ASE 0.7890908
# complete the model, test residual
ljung.wge(f_il_dposr$resid,p=12,q=3)
## Obs -0.063869 -0.1135724 0.008518975 -0.07065248 -0.2019947 -0.07951013 0.07520751 -0.1340275 0.02708842 -0.03805784 -0.02526089 0.110918 -0.06004233 0.02460496 0.08762631 0.02826428 0.01172655 0.05007233 0.06996003 -0.09591883 0.0437517 -0.09940294 -0.05619927 -0.0982514
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 24
##
## $chi.square
## [1] 14.42298
##
## $df
## [1] 9
##
## $pval
## [1] 0.1080547
# pval = 0.1080547 white noise
plotts.wge(f_il_dposr$res)
acf(f_il_dposr$resid)
# forecast out
f_il_dposr_7 <- fore.aruma.wge(il_dposrater, d=0, phi = il_estaic$phi,theta = il_estaic$theta, n.ahead = 7, lastn= F, limits=T )
f_il_dposr_90 <- fore.aruma.wge(il_dposrater, d=0, phi = il_estaic$phi,theta = il_estaic$theta, n.ahead = 90, lastn= F, limits=T )
IL5 = ts(IL4$posrate)
#rolling ASE
trainingSize = 50
horizon = 7
ASEHolder = numeric()
for( i in 1:(73-(trainingSize + horizon) + 1))
{
forecasts = fore.aruma.wge(il_dposrater[i:(i+(trainingSize-1))], d=0, phi = il_estaic$phi,theta = il_estaic$theta, n.ahead = horizon)
ASE = mean((IL5[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts$f)^2)
ASEHolder[i] = ASE
}
ASEHolder # taking 10 mins to run
## [1] 0.36411950 0.22329315 0.50961062 0.44589024 0.46011665 0.13418412
## [7] 0.41841939 0.19013453 0.15894212 0.18353181 0.08915786 0.24691020
## [13] 0.39616815 0.70155439 0.57833722 0.62492572 0.40855099
hist(ASEHolder)
WindowedASE = mean(ASEHolder)
summary(ASEHolder)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.08916 0.19013 0.39617 0.36081 0.46012 0.70155
WindowedASE #0.3608145
## [1] 0.3608145
#diff data (ARIMA(10,2,0))
il_dposrater_d1 <- artrans.wge(il_dposrater,phi.tr = 1)
aic5.wge(il_dposrater_d1,p=0:10,q=0:4)
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of aic
## p q aic
## 38 7 2 0.6273992
## 43 8 2 0.6357281
## 48 9 2 0.6393444
## 53 10 2 0.6606535
## 7 1 1 0.6943154
#pick 7,2, aic=0.627
# estimate model phi and theta
il_estaic_d1 <- est.arma.wge(il_dposrater_d1, p=7, q=2, factor = T)
##
## Coefficients of Original polynomial:
## -0.9598 0.2974 0.4454 0.2274 0.0791 0.3177 0.3571
##
## Factor Roots Abs Recip System Freq
## 1-0.9684B 1.0326 0.9684 0.0000
## 1+1.8857B+0.9057B^2 -1.0410+-0.1429i 0.9517 0.4783
## 1+0.8562B+0.6956B^2 -0.6154+-1.0290i 0.8340 0.3358
## 1-0.8138B+0.5852B^2 0.6953+-1.1070i 0.7650 0.1607
##
##
# do another diff
il_dposrater_d2 <- artrans.wge(il_dposrater_d1,phi.tr = 1)
aic5.wge(il_dposrater_d2,p=0:10,q=0:4)
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of aic
## p q aic
## 51 10 0 0.5817741
## 36 7 0 0.8473765
## 31 6 0 0.8482152
## 41 8 0 0.8549106
## 46 9 0 0.8832552
il_estaic_d2 <- est.arma.wge(il_dposrater_d2, p=10, q=0, factor = T)
##
## Coefficients of Original polynomial:
## -1.7663 -2.0598 -2.1473 -2.1295 -2.1472 -1.7187 -1.4115 -1.1591 -0.8642 -0.4916
##
## Factor Roots Abs Recip System Freq
## 1+1.8872B+0.9225B^2 -1.0228+-0.1943i 0.9605 0.4701
## 1+1.2660B+0.8951B^2 -0.7072+-0.7856i 0.9461 0.3667
## 1-0.6150B+0.8627B^2 0.3565+-1.0159i 0.9288 0.1963
## 1+0.5744B+0.8524B^2 -0.3369+-1.0294i 0.9232 0.3003
## 1-1.3462B+0.8096B^2 0.8314+-0.7375i 0.8998 0.1155
##
##
f_il_dposr_d2 <- fore.aruma.wge(il_dposrater, d=2, phi = il_estaic_d2$phi,theta = il_estaic_d2$theta, n.ahead = 20, lastn= T, limits=F )
# ASE
ase_il_dposr_d2 = mean((f_il_dposr_d2$f - il_dposrater[(length(il_dposrater)-20+1):length(il_dposrater)])^2)
ase_il_dposr_d2
## [1] 0.3281278
# ASE 30.39611
# complete the model, test residual
ljung.wge(f_il_dposr_d2$resid,p=10,q=0)
## Obs 0.09210403 -0.07900541 0.139213 -0.1145658 -0.2052481 -0.1140479 0.04034315 -0.2857225 -0.1541804 -0.1375917 -0.06348626 0.1214589 -0.01428639 0.2038236 0.1721987 -0.03289784 -0.02268816 0.2011555 0.04370681 -0.06099152 0.03421798 -0.1612467 -0.1604852 -0.09692866
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 24
##
## $chi.square
## [1] 38.72615
##
## $df
## [1] 14
##
## $pval
## [1] 0.0004019996
# pval = 0.000402, NOT white noise
plotts.wge(f_il_dposr_d2$res)
acf(f_il_dposr_d2$resid)
# forecast out
f_il_dposr_d2_7 <- fore.aruma.wge(il_dposrater, d=2, phi = il_estaic_d2$phi,theta = il_estaic_d2$theta, n.ahead = 7, lastn= F, limits=T )
f_il_dposr_d2_90 <- fore.aruma.wge(il_dposrater, d=2, phi = il_estaic_d2$phi,theta = il_estaic_d2$theta, n.ahead = 90, lastn= F, limits=T )
#rolling ASE
trainingSize = 50
horizon = 7
ASEHolder = numeric()
for( i in 1:(73-(trainingSize + horizon) + 1))
{
forecasts = fore.aruma.wge(il_dposrater[i:(i+(trainingSize-1))], d=2, phi = il_estaic_d2$phi,theta = il_estaic_d2$theta, n.ahead = horizon)
ASE = mean((IL5[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts$f)^2)
ASEHolder[i] = ASE
}
ASEHolder # taking 10 mins to run
## [1] 0.56210206 0.14608297 0.15136368 0.16864294 0.15309334 0.08232689
## [7] 0.26140242 0.06814249 0.04551926 0.08659063 0.08965176 0.32734587
## [13] 0.64508697 0.79941523 0.66215248 0.82064134 0.66171876
hist(ASEHolder)
WindowedASE = mean(ASEHolder)
summary(ASEHolder)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.04552 0.08965 0.16864 0.33713 0.64509 0.82064
WindowedASE #0.3371341
## [1] 0.3371341
VAR, MLP, Rolling ASE for IL
#IL4 is the dataset
#hospitalizedCurrently 0.91
#deathIncrease 0.634
#inIcuCurrently 0.882
#onVent 0.872
#negativeIncrease -0.654
# we'll move forward for multi variants analysis factor these 3 variables
ccf(IL4$posrate,IL4$hospitalized)
ccf(IL4$posrate,IL4$deathIncrease)
ccf(IL4$posrate,IL4$inIcu)
ccf(IL4$posrate,IL4$onVent)
ccf(IL4$posrate,IL4$negativeIncrease)
IL4_try4=data.frame(posrate=IL4$posrate,hospitalized=IL4$hospitalized)
IL4_try2=data.frame(posrate=IL4$posrate,deathIncrease=IL4$deathIncrease)
IL4_try3=data.frame(posrate=IL4$posrate,inIcu=IL4$inIcu,onVent=IL4$onVent)
IL4_try5=data.frame(posrate=IL4$posrate,negativeIncrease=IL4$negativeIncrease)
set.seed(1)
VARselect(IL4_try4,type='both',lag.max=16) #7, many Infs
## $selection
## AIC(n) HQ(n) SC(n) FPE(n)
## 15 1 1 15
##
## $criteria
## 1 2 3 4 5
## AIC(n) 9.628964 9.630885 9.613715 9.622478 9.657919
## HQ(n) 9.740402 9.798043 9.836592 9.901075 9.992235
## SC(n) 9.915708 10.061001 10.187203 10.339338 10.518151
## FPE(n) 15205.700180 15251.767560 15024.615986 15211.606633 15846.465970
## 6 7 8 9 10
## AIC(n) 9.694604 9.526542 9.519532 9.526003 9.529141
## HQ(n) 10.084639 9.972296 10.021005 10.083196 10.142053
## SC(n) 10.698208 10.673518 10.809880 10.959723 11.106233
## FPE(n) 16565.992283 14150.567143 14245.940099 14592.594110 14965.172667
## 11 12 13 14 15
## AIC(n) 9.463028 9.370639 9.286496 9.239543 8.854342
## HQ(n) 10.131659 10.094989 10.066565 10.075332 9.745850
## SC(n) 11.183493 11.234475 11.293704 11.390124 11.148295
## FPE(n) 14395.934386 13572.270529 12994.666942 13022.355567 9397.974035
## 16
## AIC(n) 8.919704
## HQ(n) 9.866932
## SC(n) 11.357029
## FPE(n) 10769.465194
lsfit_IL4=VAR(IL4_try4, p=15, type ='both')################try to put few variables in xtrgen instead?
# short term forecast VAR US
preds_IL4=predict(lsfit_IL4,n.ahead=7)
preds_IL4$fcst$posrate[1:7,1]
## [1] 3.225589 4.788572 2.936862 4.454516 3.585940 3.802660 2.588652
# making graph for 7 ahead prediction
plot(IL4$posrate, type="o",xlim=c(1,81),ylim=c(1,15))
points(seq(74,80,1),preds_IL4$fcst$posrate[1:7,1], type='o',col="blue")
# long term forecast VAR US
preds_IL4L=predict(lsfit_IL4,n.ahead=90)
preds_IL4L$fcst$posrate[1:90,1]
## [1] 3.225589 4.788572 2.936862 4.454516 3.585940 3.802660 2.588652 3.219270
## [9] 3.275490 3.991660 3.955019 4.056987 3.401566 3.103426 3.956585 3.481761
## [17] 4.303974 3.731541 4.422370 3.600931 3.838421 3.216895 3.802075 3.752772
## [25] 4.220904 4.204332 3.918949 3.764884 3.644447 4.174983 3.996579 4.495958
## [33] 4.191006 4.407883 3.811853 4.267713 3.984396 4.393886 4.454797 4.668526
## [41] 4.507530 4.327197 4.443755 4.428838 4.808730 4.797175 5.071262 4.703751
## [49] 4.877480 4.644730 4.973739 4.924364 5.240909 5.209972 5.237510 5.122357
## [57] 5.105144 5.263664 5.315188 5.610734 5.530140 5.625867 5.378910 5.551237
## [65] 5.484604 5.742452 5.766311 5.924127 5.815645 5.808306 5.799436 5.844240
## [73] 6.007485 6.077398 6.223652 6.099318 6.152542 6.042116 6.205597 6.224035
## [81] 6.427576 6.414783 6.456755 6.385807 6.409186 6.465100 6.552707 6.693191
## [89] 6.717242 6.768358
# making graph for 90 ahead prediction
plot(IL4$posrate, type="o",xlim=c(1,164),ylim=c(1,16))
points(seq(74,163,1),preds_IL4L$fcst$posrate[1:90,1], type='o',col="blue")
# looks flat and a bit following the upwards trend
# Rolling ASE
trainingSize = 50
horizon = 7
ASEHolder = numeric()
for( i in 1:(73-(trainingSize + horizon) + 1))
{
ILp = IL4[i:(i+trainingSize),]
lsfit_IL4 = VAR(ILp,p=6)
forecasts = predict(lsfit_IL4,n.ahead = horizon)
ASE = mean((IL4$posrate[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts$fcst$posrate)^2)
ASEHolder[i] = ASE
}
ASEHolder
## [1] 15.391349 24.153240 29.111119 9.741131 9.606046 8.875742 7.274734
## [8] 7.724953 11.392253 10.752786 11.021295 7.644551 6.838913 6.531442
## [15] 6.281423 4.852618 4.097239
hist(ASEHolder)
WindowedASE = mean(ASEHolder)
summary(ASEHolder)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.097 6.839 8.876 10.664 11.021 29.111
WindowedASE #10.66417
## [1] 10.66417
IL5 = ts(IL4$posrate)
library(nnfor)
set.seed(5)
fitmlp_ILu=mlp(IL5,rep=100)
# short term forecast MLP uni US
f_fitmlp_ILu=forecast(fitmlp_ILu,h=7)
plot(f_fitmlp_ILu)
# long term forecasst MLP uni us
f_fitmlp_ILuL=forecast(fitmlp_ILu,h=90)
plot(f_fitmlp_ILuL)
# do a better job at visulize the 90 day forecast
plot(IL5, type = "l",xlim=c(1,163),ylim=c(1,17))
lines(seq(74,163,1),f_fitmlp_ILuL$mean,col = "blue")
# rolling ASE
trainingSize = 50
horizon = 7
ASEHolder = numeric()
for( i in 1:(73-(trainingSize + horizon) + 1))
{
ILp = ts(IL5[i:(i+trainingSize)])
fitmlp_IL5 = mlp(ILp,rep=25)
forecasts = forecast(fitmlp_IL5,h = horizon)
ASE = mean((IL5[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts$mean)^2)
ASEHolder[i] = ASE
}
ASEHolder # taking 10 mins to run
## [1] 0.22796774 0.21705668 0.16349143 0.08359748 0.09554394 0.04631433
## [7] 0.05058953 0.10353288 0.12407947 0.16129188 0.23508583 0.18022154
## [13] 0.86000455 0.62843868 0.79646606 0.67404134 0.34066257
hist(ASEHolder)
WindowedASE = mean(ASEHolder)
summary(ASEHolder)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.04631 0.10353 0.18022 0.29343 0.34066 0.86000
WindowedASE #0.2934345
## [1] 0.2934345
#short term
ensemble_IL_uni = (f_fitmlp_ILu$mean + f_il_dposr_d2_7$f +f_il_dposr_7$f)/3
plot(IL5, type = "l",xlim=c(1,81),ylim=c(1,12))
lines(seq(74,80,1),ensemble_IL_uni,col = "blue")
#long term
ensemble_IL_uniL = (f_fitmlp_ILuL$mean + f_il_dposr_d2_90$f +f_il_dposr_90$f)/3
plot(IL5, type = "l",xlim=c(1,163),ylim=c(1,12))
lines(seq(74,163,1),ensemble_IL_uniL,col = "blue")
set.seed(2)
# forecast regressor value in order to MLP posrate forecast into the future......short term
hospitalized = ts(IL4$hospitalized)
fitmlp_hospitalized7=mlp(hospitalized,rep=30)
f_fitmlp_hospitalized7=forecast(fitmlp_hospitalized7,h=7)
plot(f_fitmlp_hospitalized7)
hospitalized7 = ts(c(IL4$hospitalized, f_fitmlp_hospitalized7$mean))
deathIncrease = ts(IL4$deathIncrease)
fitmlp_deathIncrease7=mlp(deathIncrease,rep=30)
f_fitmlp_deathIncrease7=forecast(fitmlp_deathIncrease7,h=7)
plot(f_fitmlp_deathIncrease7)
deathIncrease7 = ts(c(IL4$deathIncrease, f_fitmlp_deathIncrease7$mean))
inIcu = ts(IL4$inIcu)
fitmlp_inIcu7=mlp(inIcu,rep=30)
f_fitmlp_inIcu7=forecast(fitmlp_inIcu7,h=7)
plot(f_fitmlp_inIcu7)
inIcu7 = ts(c(IL4$inIcu, f_fitmlp_inIcu7$mean))
onVent = ts(IL4$onVent)
fitmlp_onVent7=mlp(onVent,rep=30)
f_fitmlp_onVent7=forecast(fitmlp_onVent7,h=7)
plot(f_fitmlp_onVent7)
onVent7 = ts(c(IL4$onVent, f_fitmlp_onVent7$mean))
negativeIncrease = ts(IL4$negativeIncrease)
fitmlp_negativeIncrease7=mlp(negativeIncrease,rep=30)
f_fitmlp_negativeIncrease7=forecast(fitmlp_negativeIncrease7,h=7)
plot(f_fitmlp_negativeIncrease7)
negativeIncrease7 = ts(c(IL4$negativeIncrease, f_fitmlp_negativeIncrease7$mean))
# setup regressor df
IL6_old = data.frame(hospitalized=hospitalized7,deathIncrease=deathIncrease7,inIcu=inIcu7,onVent=onVent7,negativeIncrease=negativeIncrease7)
# lon term forecast is over 100% half way, try to adjust xreg
IL6 = data.frame(hospitalized=hospitalized7,deathIncrease=deathIncrease7,inIcu=inIcu7,onVent=onVent7)
fitmlp_ILm=mlp(IL5,rep=30, xreg=IL6)
fitmlp_ILm
## MLP fit with 5 hidden nodes and 30 repetitions.
## Series modelled in differences: D1.
## Univariate lags: (1,2,3)
## 1 regressor included.
## - Regressor 1 lags: (2,3,4)
## Forecast combined using the median operator.
## MSE: 0.2944.
f_fitmlp_ILm=forecast(fitmlp_ILm,h=7,xreg =IL6)
plot(f_fitmlp_ILm)
# forecast regressor value in order to MLP posrate forecast into the future.. long term
#hospitalized = ts(IL4$hospitalized)
#fitmlp_hospitalized7=mlp(hospitalized,rep=30)
f_fitmlp_hospitalized90=forecast(fitmlp_hospitalized7,h=90)
plot(f_fitmlp_hospitalized90)
hospitalized90 = ts(c(IL4$hospitalized, f_fitmlp_hospitalized90$mean))
#deathIncrease = ts(IL4$deathIncrease)
#fitmlp_deathIncrease7=mlp(deathIncrease,rep=30)
f_fitmlp_deathIncrease90=forecast(fitmlp_deathIncrease7,h=90)
plot(f_fitmlp_deathIncrease7)
deathIncrease90 = ts(c(IL4$deathIncrease, f_fitmlp_deathIncrease90$mean))
#inIcu = ts(IL4$inIcu)
#fitmlp_inIcu7=mlp(inIcu,rep=30)
f_fitmlp_inIcu90=forecast(fitmlp_inIcu7,h=90)
plot(f_fitmlp_inIcu90)
inIcu90 = ts(c(IL4$inIcu, f_fitmlp_inIcu90$mean))
#onVent = ts(IL4$onVent)
#fitmlp_onVent7=mlp(onVent,rep=30)
f_fitmlp_onVent90=forecast(fitmlp_onVent7,h=90)
plot(f_fitmlp_onVent90)
onVent90 = ts(c(IL4$onVent, f_fitmlp_onVent90$mean))
#negativeIncrease = ts(IL4$negativeIncrease)
#fitmlp_negativeIncrease7=mlp(negativeIncrease,rep=30)
f_fitmlp_negativeIncrease90=forecast(fitmlp_negativeIncrease7,h=90)
plot(f_fitmlp_negativeIncrease90)
negativeIncrease90 = ts(c(IL4$negativeIncrease, f_fitmlp_negativeIncrease90$mean))
# setup regressor df
IL6L_old = data.frame(hospitalized=hospitalized90,deathIncrease=deathIncrease90,inIcu=inIcu90,onVent=onVent90,negativeIncrease=negativeIncrease90)
IL6L = data.frame(hospitalized=hospitalized90,deathIncrease=deathIncrease90,inIcu=inIcu90,onVent=onVent90)
fitmlp_ILmL=mlp(IL5,rep=30, xreg=IL6L)
fitmlp_ILmL
## MLP fit with 5 hidden nodes and 30 repetitions.
## Series modelled in differences: D1.
## Univariate lags: (1,2,3)
## 1 regressor included.
## - Regressor 1 lags: (2,3,4)
## Forecast combined using the median operator.
## MSE: 0.3589.
f_fitmlp_ILmL=forecast(fitmlp_ILmL,h=90,xreg =IL6L)
plot(f_fitmlp_ILmL)
# fix negative rate forecast, set to zero
for (i in 1:length(f_fitmlp_ILmL$mean)) {
if (f_fitmlp_ILmL$mean[i]<0.0) f_fitmlp_ILmL$mean[i]=0.0
}
# try plot the mean and original data, make the graph more visible. Otherwise too wide of different reps, make just a flat line.
plot(IL5, type = "l",xlim=c(1,163),ylim=c(0,15))
lines(seq(74,163,1),f_fitmlp_ILmL$mean,col = "blue")
# rolling ASE
trainingSize = 50
horizon = 7
ASEHolder = numeric()
for( i in 1:(73-(trainingSize + horizon) + 1))
{
ILp = ts(IL5[i:(i+trainingSize)])
ILR = IL6[i:(i+trainingSize+horizon),]
fitmlp_IL6 = mlp(ILp,rep=20,xreg=ILR)
forecasts = forecast(fitmlp_IL6,h = horizon,xreg=ILR)
ASE = mean((IL5[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts$mean)^2)
ASEHolder[i] = ASE
}
ASEHolder # taking 10 mins to run
## [1] 0.09046104 0.63251903 0.31769679 0.42378685 0.13899786 0.39011539
## [7] 0.20221401 0.11334207 0.12992757 0.13298685 0.26756311 0.30200406
## [13] 0.68266885 0.34401345 0.32363498 0.35984946 0.34144762
hist(ASEHolder)
WindowedASE = mean(ASEHolder)
summary(ASEHolder)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.09046 0.13900 0.31770 0.30548 0.35985 0.68267
WindowedASE #0.3054841
## [1] 0.3054841
#short term
ensemble_IL = (f_fitmlp_ILm$mean + preds_IL4$fcst$posrate[,1])/2
plot(IL5, type = "l",xlim=c(1,81),ylim=c(1,12))
lines(seq(74,80,1),ensemble_IL,col = "blue")
#long term
ensemble_ILL = (f_fitmlp_ILmL$mean + preds_IL4L$fcst$posrate[,1])/2
plot(IL5, type = "l",xlim=c(1,163),ylim=c(1,12))
lines(seq(74,163,1),ensemble_ILL,col = "blue")